clear
set matsize 800
cd "C:\Users\jedwab\Desktop\Replication Files JJK 2023 JUE\Stata"
set more off, permanently

* This do-file creates the main data set used for the analysis:
* finaljjk4 - main data set
* data_for_het - data set for the heterogeneity analysis
* dmvdata - data set for the DMV analysis 

********************************
* RAW CITY POPULATION DATA SET *
********************************

* We first start with the Bairoch data set from Voigtlander and Voth (2012).
* Source: Voigtlander, Nico and Hans-Joachim Voth, “The Three Horsemen of Riches: Plague, War, and Urbanization in early modern Europe,” Review of Economic Studies, 2013, 80, 774–811.
use bairoch_data, clear
ren cname city_jjk
label var city_jjk "City name"
ren id city_id
label var city_id "City id (Bairoch)"
drop page
label var year "Year"
ren pop pop_bairoch 
label var pop_bairoch "Population in thousands (source: Bairoch)"
* These are the countries for which the city population data is available
gen countryname = ""
replace countryname = "Germany" if country == 1
replace countryname = "Austria" if country == 2
replace countryname = "Belgium" if country == 3
replace countryname = "Bulgaria" if country == 4
replace countryname = "Denmark" if country == 5
replace countryname = "Spain" if country == 6
replace countryname = "Finland" if country ==7 
replace countryname = "France" if country == 8
replace countryname = "United Kingdom" if country == 9
replace countryname = "Greece" if country == 10
replace countryname = "Hungary" if country == 11
replace countryname = "Ireland" if country == 12
replace countryname = "Italy" if country == 13
replace countryname = "Luxembourg" if country == 14
replace countryname = "Malta" if country == 15
replace countryname = "Norway" if country == 16
replace countryname = "The Netherlands" if country == 17 
replace countryname = "Poland" if country == 18
replace countryname = "Portugal" if country == 19
replace countryname = "Romania" if country == 20
replace countryname = "Russia" if country == 21
replace countryname = "Sweden" if country == 22
replace countryname = "Switzerland" if country == 23
replace countryname = "Czech Republic" if country == 24
replace countryname = "Yugoslavia" if country == 25
replace countryname = "Albania" if country == 26
replace countryname = "Malta" if city_jjk == "LA VALETTE"
label var countryname "Country name"
drop country
codebook countryname
gen countrycity = countryname+city_jjk
codebook countrycity
label var countrycity "Country name + city name"
* 2,204 cities belonging to 26 countries.
codebook year
* 13 years fron 800-1850.
count
* We have 2,204 cities x 13 years = 28,652 observations.
order city_jjk city_id countryname countrycity year  
save finaljjk, replace
codebook city_jjk city_id countryname countrycity year  

* We combine it with a Bairoch data set that was given to us by Mauricio Drelichman
* The data set has more precise geographical coordinates.
clear
import excel "drelichman.xls", sheet("villes_new1") firstrow
keep city NewLat NewLng
ren city city_jjk
sort city_jjk
save drelichman, replace

use finaljjk, clear
* We first need to change the name of various cities (to match the city names in the Drelichman data set).
replace city_jjk = subinstr(city_jjk,"*","",.) 
replace city_jjk = subinstr(city_jjk," ","",.) 
replace city_jjk = "ARCOS-DE-LA-FRONTERA" if city_jjk == "ARCOS-DE-LA-PRONTERA"
replace city_jjk = "CARINI" if city_jjk == "CARJNI"
replace city_jjk = "LJUBLJANA" if city_jjk == "IJUBLJANA"
replace city_jjk = "ZITTAU" if city_jjk == "ZITTTAU"
replace city_jjk = "VILLANUEVA-DE-LOS-INFANTE" if city_jjk == "VILLANUEVA-DE-LOS-INFANTES"
replace city_jjk = "VERHNIJLOMOV" if city_jjk == "VERHNULOMOV"
replace city_jjk = "TUTRAKAN" if city_jjk == "TUTKAKAN"
replace city_jjk = "TIVERTON" if city_jjk == "TTVERTON"
replace city_jjk = "TONGEREN" if city_jjk == "TONGBREN"
replace city_jjk = "TALAVERA" if city_jjk == "TAIAVERA"
replace city_jjk = "SUZDAL'" if city_jjk == "SUZDAL"
replace city_jjk = "SANT'ANGELODIBROLO" if city_jjk == "SANTANGELODIBROLO"
replace city_jjk = "RJAZAN'" if city_jjk == "RJAZAN"
replace city_jjk = "PUTIVL'" if city_jjk == "PUTIVL"
replace city_jjk = "RIVE-DE-GIER" if city_jjk == "PJVE-DE-GIER"
replace city_jjk = "PERM'" if city_jjk == "PERM"
replace city_jjk = "NAHICEVAN'" if city_jjk == "NAHICEVAN"
replace city_jjk = "MONTESANT'ANGELO" if city_jjk == "MONTESANTANGELO"
replace city_jjk = "KOBELJAKI" if city_jjk == "KOBEIJAKI"
replace city_jjk = "KLAJPEDA" if city_jjk == "KLAIPEDA"
replace city_jjk = "KIZLJAR" if city_jjk == "KIZIJAR"
replace city_jjk = "KERC'" if city_jjk == "KERC"
replace city_jjk = "KALJAZIN" if city_jjk == "KALIAZIN"
replace city_jjk = "JAROSLAVL'" if city_jjk == "JAROSLAVL"
replace city_jjk = "DALIAS" if city_jjk == "DALLAS"
replace city_jjk = "COLMENAR-VIEJO" if city_jjk == "COLMENAR-VIEIO"
replace city_jjk = "CITTADICASTELLO" if city_jjk == "CITTADICASETELLO"
replace city_jjk = "CISTOPOL'" if city_jjk == "CISTOPOL"
replace city_jjk = "CASSANOALL'IONIO" if city_jjk == "CASSANOALLIONIO"
replace city_jjk = "SHERTOGENBOSCH" if city_jjk == "'SHERTOGENBOSCH"
replace city_jjk = "SGRAVENHAGUE" if city_jjk == "'SGRAVENHAGUE"
replace city_jjk = "ELAT'MA" if city_jjk == "ELATMA"
replace city_jjk = "KAMENEC-PODOL'SKIJ" if city_jjk == "KAMENEC-PODOL'SKU"
replace city_jjk = "MOGILEV-PODOL'SKIJ" if city_jjk == "MOGILEV-PODOL'SKU"
replace city_jjk = "PEREJASLAV-HMEL'NICKIJ" if city_jjk == "PEREJASLAV-HMEL'NICKO"
save finaljjk, replace
codebook city_jjk city_id countryname countrycity year  

* We test if there are cities that appear twice in the sample.
gen test = 1
collapse (sum) test, by(city_jjk)
tab test
tab city_jjk if test == 26 
* There are three such cities: Alba, Brest and Halle
use finaljjk, clear
tab countryname if city_jjk == "ALBA"
* Italy and Spain
tab countryname if city_jjk == "BREST"
* France and Russia
tab countryname if city_jjk == "HALLE"
* Belgium and Germany
* We must ensure that these cities have the right geographical coordinates when we merge the two data sets. 
sort city_jjk
merge city_jjk using drelichman
tab _m
* ok 
drop _m
replace countrycity = countryname+city_jjk
* We just make sure that we have the right country+city names
codebook NewLat NewLng
* There are no missing coordinates.
ren NewLat latitude
ren NewLng longitude 
label var latitude "Latitude of the city"
label var longitude "Longitude of the city"
* We correct the wrong geographical coordinates. 
replace longitude = 8.0356911 if countrycity == "ItalyALBA"
replace latitude = 44.700915 if countrycity == "ItalyALBA"
replace longitude = -1.3457193 if countrycity == "SpainALBA"
replace latitude = 40.61799 if countrycity == "SpainALBA"
replace longitude = -4.486076 if countrycity == "FranceBREST"
replace latitude = 48.390394 if countrycity == "FranceBREST"
replace longitude = 23.666667 if countrycity == "RussiaBREST"
replace latitude = 52.133333 if countrycity == "RussiaBREST"
replace longitude = 4.23251 if countrycity == "BelgiumHALLE"
replace latitude = 50.73757 if countrycity == "BelgiumHALLE"
replace longitude = 11.968803 if countrycity == "GermanyHALLE"
replace latitude = 51.49698 if countrycity == "GermanyHALLE"
save finaljjk, replace
codebook city_jjk city_id countryname countrycity year  

* There are still some issues with the original geographical coordinates we were using.
* We create this excel file with coordinates we verified ourselves using Google Map.
clear
import excel "new_coordinates.xlsx", sheet("Sheet1") firstrow clear
ren longitude longitude_new
ren latitude latitude_new
sort countryname city_jjk
save new_coordinates, replace

* We use these and modify additional coordinates.
use finaljjk, clear
sort countryname city_jjk
merge countryname city_jjk using new_coordinates, update 
tab _m
drop if _m == 2
drop _m
replace longitude = longitude_new if longitude_new != .
replace latitude = latitude_new if latitude_new != .
replace longitude = 8.0356911 if countrycity == "ItalyALBA"
replace latitude = 44.700915 if countrycity == "ItalyALBA"
replace longitude = -1.3457193 if countrycity == "SpainALBA"
replace latitude = 40.61799 if countrycity == "SpainALBA"
replace longitude = -4.486076 if countrycity == "FranceBREST"
replace latitude = 48.390394 if countrycity == "FranceBREST"
replace longitude = 23.666667 if countrycity == "RussiaBREST"
replace latitude = 52.133333 if countrycity == "RussiaBREST"
replace longitude = 4.23251 if countrycity == "BelgiumHALLE"
replace latitude = 50.73757 if countrycity == "BelgiumHALLE"
replace longitude = 11.968803 if countrycity == "GermanyHALLE"
replace latitude = 51.49698 if countrycity == "GermanyHALLE"
drop longitude_new latitude_new
codebook longitude latitude
save finaljjk, replace
codebook city_jjk city_id countryname countrycity year  

* In our main analysis, we focus on some countries only.
* In particular, we drop various countries located to the east of Poland, the Czech Republic, Austria and Italy.
use finaljjk, clear
replace countryname = "Slovakia" if city_jjk == "LEVOCA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "TRNAVA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "SKALICA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "NITRA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "MYJAVA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "KREMNICA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "KOSICE" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "KOMARNO" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "IGLO" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "GELNICA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "ERSEKUJVAR" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "EPERJES" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "BREZOVA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "BRATISLAVA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "BANSKASTIAVNICA" & countryname == "Czech Republic"
replace countryname = "Slovakia" if city_jjk == "BANSKABYSTRICA" & countryname == "Czech Republic"
save finaljjk, replace
codebook city_jjk city_id countryname countrycity year  

* We define the main sample of cities (16 countries of Western Europe).
use finaljjk, clear
drop if countryname == "Albania" | countryname == "Bulgaria" | countryname == "Finland" | countryname == "Greece" | countryname == "Hungary" | countryname == "Romania" | countryname == "Russia" | countryname == "Malta"  | countryname == "Yugoslavia" | countryname == "Slovakia" | countryname == "Poland"
codebook countryname
codebook countrycity
count
sort countryname city_jjk
save finaljjk1, replace
codebook city_jjk city_id countryname countrycity year  

*********************************************************************
* ADDING THE CHRISTAKOS & OTHER CITIES FOR WHICH WE HAVE POPULATION *
*********************************************************************

* We create an excel data set "nonbairoch" with information from the Christakos et al book.
* Sion is the only non-Bairoch, non-Christakos city that we add ourselves.
import excel "nonbairoch.xls", sheet("Feuil1") firstrow clear
* These are already in the Bairoch data set so we drop them from this file.
drop if city_jjk == "SAINT DENIS"
drop if city_jjk == "SYRACUSE"
drop if city_jjk == "VIC"
keep city_id city_jjk countryname 
gen nonbairoch = 1
sort city_jjk
save nonbairoch, replace
count
* 76

* We multiply this list of cities by the list of years in the Bairoch database.
import excel "year.xls", sheet("Feuil1") firstrow clear
save year, replace
use nonbairoch, clear
cross using year
tab year
sort city_jjk year
save nonbairoch, replace

* We use various sources, especially Christakos et al, to obtain their population.
import excel "extra pop christakos cities for stata.xls", sheet("Feuil1") firstrow clear
reshape long pop, i(city_jjk) j(year)
ren pop pop_christakos
label var pop_christakos "Population in thousands for Christakos cities (various sources)"
sort city_jjk year
save extra_christakos_cities, replace
count if year == 1300
* 14
use christakos_coord, clear
drop if city_jjk == "SAINT DENIS"
drop if city_jjk == "SYRACUSE"
drop if city_jjk == "VIC" 
drop mortality
sort city_jjk
save christakos_coord2, replace

* We combine the information, 
use nonbairoch, clear
* 76 cities x all the years
sort city_jjk year
merge city_jjk year using extra_christakos_cities
tab _m
drop _m
sort city_jjk
merge city_jjk using christakos_coord2
tab _m
drop if _m == 2
drop _m
ren christakos_long longitude
ren christakos_lat latitude
codebook longitude latitude
save nonbairoch, replace
desc 
tab year, m

* We add to the main database. 
use finaljjk1, clear
desc
gen nonbairoch = 0
append using nonbairoch
replace countrycity = countryname+city_jjk if countrycity == ""
label var nonbairoch "City not in the original Bairoch sample"
gen longitude2=longitude*longitude
gen longitude3=longitude*longitude*longitude
gen longitude4=longitude*longitude*longitude*longitude
gen latitude2=latitude*latitude
gen latitude3=latitude*latitude*latitude
gen latitude4=latitude*latitude*latitude*latitude
label var longitude2 "Longitude of the city: Square"
label var longitude3 "Longitude of the city: Cube"
label var longitude4 "Longitude of the city: Fourth Order"
label var latitude2 "Latitude of the city: Square"
label var latitude3 "Latitude of the city: Cube"
label var latitude4 "Latitude of the city: Fourth Order"
codebook *
order city* country* year nonbairoch pop_bairoch pop_christakos longitude* latitude*  
save finaljjk1, replace
codebook countryname
tab countryname
codebook longitude latitude
tab year, m 
* We have 1802 cities in each year.
 
*******************************
* ADDITIONAL CITY INFORMATION *
*******************************

* We correct the Bairoch data set using Chandler (1974).
* Indeed, while the Bairoch data set is our main source of information, Chandler is more specific in the sources used to measure city population. 
* We also use various sources to improve the city population data set. Details are provided below. 

use finaljjk1, clear
gen pop = pop_bairoch
replace pop = pop_christakos if pop == .
codebook pop
label var pop "Population in thousands (various sources)"
save finaljjk1, replace 
codebook longitude latitude
tab city_jjk countryname if longitude == .
* We focus on the top 100 cities in 1300 since they comprise our main data set.
* We only consider the top 100 cities since these cities are the most likely to drive our results. 

use finaljjk1, clear
* We first merge the information on city population from Chandler.
* chandlerpop: data set with information from Chandler.
sort countryname city_jjk year
merge countryname city_jjk year using chandlerpop
tab _m
* ok
drop if _m == 2
drop _m
gen remipop = pop
* For the time being, we call pop remipop as the data was corrected by one of the authors, Remi Jedwab, based on comparing the Bairoch and Chandler data sets one by one. 
* We do it by decreasing order of population (1300) in Bairoch

** 1 GRANADA **

replace chandlerpop = 60 if city_jjk == "GRANADA" & year == 1100
replace chandlerpop = 35 if city_jjk == "GRANADA" & year == 1700

** 2 PARIS **

replace chandlerpop = 530 if city_jjk == "PARIS" & year == 1700
replace chandlerpop = 40 if city_jjk == "PARIS" & year == 1100
replace remipop = 228 if city_jjk == "PARIS" & year == 1300

** 3 VENEZIA **

replace chandlerpop = 60 if city_jjk == "VENEZIA" & year == 1100
replace chandlerpop = 144 if city_jjk == "VENEZIA" & year == 1700
replace remipop = chandlerpop if city_jjk == "VENEZIA" & year == 1400
replace remipop = chandlerpop if city_jjk == "VENEZIA" & year == 1500

** 4 GENOVA **

replace chandlerpop = 23 if city_jjk == "GENOVA" & year == 1100
replace chandlerpop = 30 if city_jjk == "GENOVA" & year == 1200
replace chandlerpop = 85 if city_jjk == "GENOVA" & year == 1300
replace chandlerpop = 66 if city_jjk == "GENOVA" & year == 1400
replace chandlerpop = 62 if city_jjk == "GENOVA" & year == 1500
replace chandlerpop = 65 if city_jjk == "GENOVA" & year == 1600
replace chandlerpop = 67 if city_jjk == "GENOVA" & year == 1700
replace remipop = chandlerpop if city_jjk == "GENOVA" & year == 1300
replace remipop = chandlerpop if city_jjk == "GENOVA" & year == 1400
replace remipop = chandlerpop if city_jjk == "GENOVA" & year == 1500

** 5 MILANO **

replace chandlerpop = 25 if city_jjk == "MILANO" & year == 800
replace chandlerpop = 30 if city_jjk == "MILANO" & year == 900
replace chandlerpop = 30 if city_jjk == "MILANO" & year == 1000
replace chandlerpop = 42 if city_jjk == "MILANO" & year == 1100
replace chandlerpop = 60 if city_jjk == "MILANO" & year == 1200
replace chandlerpop = 60 if city_jjk == "MILANO" & year == 1300
replace chandlerpop = 80 if city_jjk == "MILANO" & year == 1400
replace chandlerpop = 89 if city_jjk == "MILANO" & year == 1500
replace chandlerpop = 107 if city_jjk == "MILANO" & year == 1600
replace chandlerpop = 113 if city_jjk == "MILANO" & year == 1700
replace chandlerpop = 110 if city_jjk == "MILANO" & year == 1750
replace chandlerpop = 122 if city_jjk == "MILANO" & year == 1800
replace chandlerpop = 183 if city_jjk == "MILANO" & year == 1850
replace remipop = chandlerpop if city_jjk == "MILANO"

** 6 FIRENZE **

replace chandlerpop = 68 if city_jjk == "FIRENZE" & year == 1700
replace remipop = 15 if city_jjk == "FIRENZE" & year == 1200
replace remipop = 80 if city_jjk == "FIRENZE" & year == 1300
replace remipop = 5 if city_jjk == "FIRENZE" & year == 1100

** 7 SEVILLA **

replace chandlerpop = 120 if city_jjk == "SEVILLA" & year == 1100
replace chandlerpop = 80 if city_jjk == "SEVILLA" & year == 1700

** 8 CORDOBA **

replace chandlerpop = 60 if city_jjk == "CORDOBA" & year == 1100
replace chandlerpop = 27 if city_jjk == "CORDOBA" & year == 1700
replace remipop = 36 if city_jjk == "CORDOBA" & year == 1400

** 9 NAPOLI **

replace chandlerpop = 32 if city_jjk == "NAPOLI" & year == 1100
replace chandlerpop = 35 if city_jjk == "NAPOLI" & year == 1200
replace chandlerpop = 40 if city_jjk == "NAPOLI" & year == 1300
replace chandlerpop = 40 if city_jjk == "NAPOLI" & year == 1400
replace chandlerpop = 114 if city_jjk == "NAPOLI" & year == 1500
replace chandlerpop = 224 if city_jjk == "NAPOLI" & year == 1600
replace chandlerpop = 207 if city_jjk == "NAPOLI" & year == 1700

** 10 KOELN **

replace chandlerpop = 35 if city_jjk == "KOELN" & year == 1100
replace chandlerpop = 37 if city_jjk == "KOELN" & year == 1700

** 11 PALERMO **

replace chandlerpop = 75 if city_jjk == "PALERMO" & year == 1000
replace chandlerpop = 90 if city_jjk == "PALERMO" & year == 1100
replace remipop = 75 if city_jjk == "PALERMO" & year == 1000
replace remipop = 90 if city_jjk == "PALERMO" & year == 1100

** 12 SIENNA **

replace chandlerpop = 15 if city_jjk == "SIENA" & year == 1200
replace chandlerpop = 21 if city_jjk == "SIENA" & year == 1300
replace chandlerpop = 15 if city_jjk == "SIENA" & year == 1400
replace chandlerpop = 22 if city_jjk == "SIENA" & year == 1500
replace chandlerpop = 19 if city_jjk == "SIENA" & year == 1600
replace chandlerpop = 16 if city_jjk == "SIENA" & year == 1700
replace remipop = chandlerpop  if city_jjk == "SIENA" & year <= 1500

** 13 BARCELONA **

replace chandlerpop = 12 if city_jjk == "BARCELONA" & year == 1100
replace chandlerpop = 15 if city_jjk == "BARCELONA" & year == 1200
replace chandlerpop = 43 if city_jjk == "BARCELONA" & year == 1700

** 14 VALENCIA **

replace chandlerpop = 13 if city_jjk == "VALENCIA" & year == 1100
replace chandlerpop = 21 if city_jjk == "VALENCIA" & year == 1200
replace chandlerpop = 40 if city_jjk == "VALENCIA" & year == 1700

** 15 TOLEDO **

replace chandlerpop = 31 if city_jjk == "TOLEDO" & year == 1100
replace chandlerpop = 32 if city_jjk == "TOLEDO" & year == 1200
replace chandlerpop = 25 if city_jjk == "TOLEDO" & year == 1700
replace chandlerpop = 31 if city_jjk == "TOLEDO" & year == 1000
replace remipop = 57 if city_jjk == "TOLEDO" & year == 1600
replace remipop = chandlerpop if city_jjk == "TOLEDO" & year >= 1000 & year <= 1600

** 16 GENT **

replace chandlerpop = 12 if city_jjk == "GENT" & year == 1100
replace chandlerpop = 25 if city_jjk == "GENT" & year == 1200
replace chandlerpop = 49 if city_jjk == "GENT" & year == 1700

** 17 AQUILA **

* There is no estimate provided in Chandler.
* On Wikipedia, it can be read: "The city's construction was begun by Frederick II, Holy Roman Emperor and King of Sicily, out of several already existing villages (ninety-nine, according to local tradition; see Amiternum), as a bulwark against the power of the papacy. The name of Aquila means "Eagle" in Italian. Construction was completed in 1254 under Frederick's son, Conrad IV of Germany."
* So Bairoch's estimate could be right.

** 18 BOLOGNA **

replace chandlerpop = 35 if city_jjk == "BOLOGNA" & year == 1200
replace chandlerpop = 63 if city_jjk == "BOLOGNA" & year == 1600
replace chandlerpop = 63 if city_jjk == "BOLOGNA" & year == 1700

** 19 BRUGGE **

replace chandlerpop = 12 if city_jjk == "BRUGGE" & year == 1000
replace chandlerpop = 15 if city_jjk == "BRUGGE" & year == 1100
replace chandlerpop = 25 if city_jjk == "BRUGGE" & year == 1200
replace chandlerpop = 35 if city_jjk == "BRUGGE" & year == 1700
replace remipop = chandlerpop if city_jjk == "BRUGGE" & year <= 1500

** 20 CREMONA **

replace chandlerpop = 15 if city_jjk == "CREMONA" & year == 1100
replace chandlerpop = 25 if city_jjk == "CREMONA" & year == 1200
replace chandlerpop = 37 if city_jjk == "CREMONA" & year == 1600
replace chandlerpop = 23 if city_jjk == "CREMONA" & year == 1700

** 21 MALAGA **

replace chandlerpop = 11 if city_jjk == "MALAGA" & year == 1100
replace chandlerpop = 20 if city_jjk == "MALAGA" & year == 1200
replace chandlerpop = 30 if city_jjk == "MALAGA" & year == 1500
replace chandlerpop = 30 if city_jjk == "MALAGA" & year == 1700
replace remipop = chandlerpop if city_jjk == "MALAGA" & year >= 1100 & year <= 1600

** 22 PISA **

replace chandlerpop = 9 if city_jjk == "PISA" & year == 1100
replace chandlerpop = 20 if city_jjk == "PISA" & year == 1200
replace chandlerpop = 11 if city_jjk == "PISA" & year == 1600
replace chandlerpop = 13 if city_jjk == "PISA" & year == 1700

** 23 FERRARA **

replace chandlerpop = 12 if city_jjk == "FERRARA" & year == 1100
replace chandlerpop = 18 if city_jjk == "FERRARA" & year == 1200
replace chandlerpop = 27 if city_jjk == "FERRARA" & year == 1700
replace remipop = chandlerpop if city_jjk == "FERRARA" & year >= 1100 & year <= 1700

** 24 LISBOA **

replace chandlerpop = 15 if city_jjk == "LISBOA" & year == 1100
replace chandlerpop = 15 if city_jjk == "LISBOA" & year == 1200
replace chandlerpop = 188 if city_jjk == "LISBOA" & year == 1700

** 25 LONDON **

replace chandlerpop = 20 if city_jjk == "LONDON" & year == 1100
replace chandlerpop = 40 if city_jjk == "LONDON" & year == 1200
replace chandlerpop = 550 if city_jjk == "LONDON" & year == 1700
replace remipop = chandlerpop if city_jjk == "LONDON" & year >= 1100 & year <= 1500
replace remipop = 60 if city_jjk == "LONDON" & year == 1300
* We use Campbell for 1300

** 26 MONTPELLIER **

replace chandlerpop = 4 if city_jjk == "MONTPELLIER" & year == 1100
replace chandlerpop = 15 if city_jjk == "MONTPELLIER" & year == 1200
replace chandlerpop = 15 if city_jjk == "MONTPELLIER" & year == 1600
replace chandlerpop = 23 if city_jjk == "MONTPELLIER" & year == 1700

** 27 PADOVA **

replace chandlerpop = 15 if city_jjk == "PADOVA" & year == 1100
replace chandlerpop = 20 if city_jjk == "PADOVA" & year == 1200
replace chandlerpop = 37 if city_jjk == "PADOVA" & year == 1700

** 28 ROUEN **

replace chandlerpop = 20 if city_jjk == "ROUEN" & year == 1100
replace chandlerpop = 40 if city_jjk == "ROUEN" & year == 1200
replace chandlerpop = 63 if city_jjk == "ROUEN" & year == 1700

** 29 ST-OMER **

replace chandlerpop = 20 if city_jjk == "ST-OMER" & year == 1200
replace chandlerpop = 17 if city_jjk == "ST-OMER" & year == 1700

** 30 PERUGIA **

* There is no estimate provided in Chandler.

** 31 ANGERS **

replace chandlerpop = 15 if city_jjk == "ANGERS" & year == 1100
replace chandlerpop = 20 if city_jjk == "ANGERS" & year == 1200
replace chandlerpop = 23 if city_jjk == "ANGERS" & year == 1700
replace chandlerpop = . if city_jjk == "ANGERS" & year == 1400
replace chandlerpop = 16 if city_jjk == "ANGERS" & year == 1500
replace chandlerpop = 20 if city_jjk == "ANGERS" & year == 1600

** 32 MARSEILLE **

replace chandlerpop = 13 if city_jjk == "MARSEILLE" & year == 1100
replace chandlerpop = 20 if city_jjk == "MARSEILLE" & year == 1200
replace chandlerpop = 75 if city_jjk == "MARSEILLE" & year == 1700

** 33 ARRAS **

replace chandlerpop = . if city_jjk == "ARRAS" & year == 1100
replace chandlerpop = 6 if city_jjk == "ARRAS" & year == 1200
replace chandlerpop = 20 if city_jjk == "ARRAS" & year == 1300
replace chandlerpop = 18 if city_jjk == "ARRAS" & year == 1400
replace chandlerpop = 12 if city_jjk == "ARRAS" & year == 1500
replace chandlerpop = 15 if city_jjk == "ARRAS" & year == 1600
replace chandlerpop = 17 if city_jjk == "ARRAS" & year == 1700
replace remipop = chandlerpop if city_jjk == "ARRAS" & year >= 1100 & year <= 1700

** 34 BORDEAUX **

replace chandlerpop = . if city_jjk == "BORDEAUX" & year == 1100
replace chandlerpop = 15 if city_jjk == "BORDEAUX" & year == 1200
replace chandlerpop = 40 if city_jjk == "BORDEAUX" & year == 1700
replace chandlerpop = 30 if city_jjk == "BORDEAUX" & year == 1500
replace remipop = 10 if city_jjk == "BORDEAUX" & year == 1100

** 35 DOUAI **

replace pop = 20 if city_jjk == "DOUAI" & year == 1300
replace pop = 17.5 if city_jjk == "DOUAI" & year == 1400

** 36 ERFURT **

replace chandlerpop = . if city_jjk == "ERFURT" & year == 1100
replace chandlerpop = 21 if city_jjk == "ERFURT" & year == 1200
replace chandlerpop = 19 if city_jjk == "ERFURT" & year == 1600
replace chandlerpop = 18 if city_jjk == "ERFURT" & year == 1700

** 37 IEPER (YPRES) **

replace chandlerpop = . if city_jjk == "IEPER" & year == 1100
replace chandlerpop = 40 if city_jjk == "IEPER" & year == 1200
replace chandlerpop = 30 if city_jjk == "IEPER" & year == 1300
replace chandlerpop = 20 if city_jjk == "IEPER" & year == 1400
replace chandlerpop = 10 if city_jjk == "IEPER" & year == 1500
replace chandlerpop = 5 if city_jjk == "IEPER" & year == 1600
replace chandlerpop = 12 if city_jjk == "IEPER" & year == 1700
replace remipop = chandlerpop if city_jjk == "IEPER" & year >= 1200 & year <= 1500

** 38 LILLE **

replace chandlerpop = 8 if city_jjk == "LILLE" & year == 1100
replace chandlerpop = 12.5 if city_jjk == "LILLE" & year == 1200
replace chandlerpop = 15 if city_jjk == "LILLE" & year == 1300
replace chandlerpop = 15 if city_jjk == "LILLE" & year == 1400
replace chandlerpop = 25 if city_jjk == "LILLE" & year == 1400
replace chandlerpop = 55 if city_jjk == "LILLE" & year == 1700
replace remipop = chandlerpop if city_jjk == "LILLE" & year >= 1100 & year <= 1400

** 39 MANTUA **

replace chandlerpop = 12 if city_jjk == "MANTUA" & year == 1100
replace chandlerpop = 12 if city_jjk == "MANTUA" & year == 1200
replace chandlerpop = 18.5 if city_jjk == "MANTUA" & year == 1300
replace chandlerpop = 21 if city_jjk == "MANTUA" & year == 1700

** 40 METZ **

replace chandlerpop = 21 if city_jjk == "METZ" & year == 1100
replace chandlerpop = 23 if city_jjk == "METZ" & year == 1200
replace chandlerpop = 28 if city_jjk == "METZ" & year == 1700

** 41 NARBONNE **

replace chandlerpop = . if city_jjk == "NARBONNE" & year == 1100
replace chandlerpop = 31 if city_jjk == "NARBONNE" & year == 1200
replace chandlerpop = 8 if city_jjk == "NARBONNE" & year == 1700
replace chandlerpop = 1 if city_jjk == "NARBONNE" & year == 1400
* Narbonne declined not just because of the Black Death, but above all because of a change in the Aude River.

** 42 PAVIA **

replace chandlerpop = 22 if city_jjk == "PAVIA" & year == 800
replace chandlerpop = 22 if city_jjk == "PAVIA" & year == 900
replace chandlerpop = 30 if city_jjk == "PAVIA" & year == 1000
replace chandlerpop = 27.5 if city_jjk == "PAVIA" & year == 1100
replace chandlerpop = 25 if city_jjk == "PAVIA" & year == 1200
replace chandlerpop = 22 if city_jjk == "PAVIA" & year == 1700
replace remipop = chandlerpop if city_jjk == "PAVIA" & year >= 800 & year <= 1700

** 43 PRAHA **

replace chandlerpop = 15 if city_jjk == "PRAHA" & year == 1000
replace chandlerpop = 18.5 if city_jjk == "PRAHA" & year == 1100
replace chandlerpop = 22 if city_jjk == "PRAHA" & year == 1200
replace chandlerpop = 48 if city_jjk == "PRAHA" & year == 1700
replace chandlerpop = 10 if city_jjk == "PRAHA" & year == 900
replace remipop = 10 if city_jjk == "PRAHA" & year == 900
replace remipop = chandlerpop if city_jjk == "PRAHA" & year >= 800 & year <= 1700

** 44 ROMA **

replace chandlerpop = 35 if city_jjk == "ROMA" & year == 1100
replace chandlerpop = 35 if city_jjk == "ROMA" & year == 1200
replace chandlerpop = 138 if city_jjk == "ROMA" & year == 1700

** 45 TOULOUSE **

replace chandlerpop = 11.5 if city_jjk == "TOULOUSE" & year == 1100
replace chandlerpop = 20 if city_jjk == "TOULOUSE" & year == 1200
replace chandlerpop = 38 if city_jjk == "TOULOUSE" & year == 1700
replace remipop = chandlerpop if city_jjk == "TOULOUSE" & year >= 1100 & year <= 1700

** 46 VERONA **

replace chandlerpop = 27 if city_jjk == "VERONA" & year == 1100
replace chandlerpop = 33 if city_jjk == "VERONA" & year == 1200
replace chandlerpop = 50 if city_jjk == "VERONA" & year == 1700
replace chandlerpop = 49 if city_jjk == "VERONA" & year == 1750
replace chandlerpop = 55 if city_jjk == "VERONA" & year == 1800
replace chandlerpop = 52 if city_jjk == "VERONA" & year == 1850
replace remipop = chandlerpop if city_jjk == "VERONA" & year >= 1100 & year <= 1850

** 47 LUEBECK (LUBECK) **

replace chandlerpop = . if city_jjk == "LUEBECK" & year == 1100
replace chandlerpop = 6 if city_jjk == "LUEBECK" & year == 1200
replace chandlerpop = 21 if city_jjk == "LUEBECK" & year == 1700

** 48 MESSINA **

replace chandlerpop = 25 if city_jjk == "MESSINA" & year == 1100
replace chandlerpop = 35 if city_jjk == "MESSINA" & year == 1200
replace chandlerpop = 45 if city_jjk == "MESSINA" & year == 1700

** 49 AUGSBURG **

replace chandlerpop = . if city_jjk == "AUGSBURG" & year == 1100
replace chandlerpop = 12 if city_jjk == "AUGSBURG" & year == 1200
replace chandlerpop = 26 if city_jjk == "AUGSBURG" & year == 1700
replace chandlerpop = 25 if city_jjk == "AUGSBURG" & year == 1300
replace remipop = chandlerpop if city_jjk == "AUGSBURG" & year >= 1400 & year <= 1700

** 50 LEON **

* NO estimate is provided in Chandler.

** 51 MAINZ **

replace chandlerpop = 25 if city_jjk == "MAINZ" & year == 1100
replace chandlerpop = 24 if city_jjk == "MAINZ" & year == 1700

** 52 SPEYER (SPIRES) **

replace chandlerpop = 30 if city_jjk == "SPEYER" & year == 1100
replace chandlerpop = 13 if city_jjk == "SPEYER" & year == 1500

** 53 TOURS ** 

replace chandlerpop = 20 if city_jjk == "TOURS" & year == 1100
replace chandlerpop = 33 if city_jjk == "TOURS" & year == 1700

** 54 VALENCIENNES **

replace remipop = chandlerpop if city_jjk == "VALENCIENNES" & year == 1400

** 55 VALLADOLID **

replace chandlerpop = 9 if city_jjk == "VALLADOLID" & year == 1100
replace chandlerpop = 18 if city_jjk == "VALLADOLID" & year == 1700

** 56 BRESCIA **

replace chandlerpop = 18 if city_jjk == "BRESCIA" & year == 1100
replace chandlerpop = 22 if city_jjk == "BRESCIA" & year == 1700
replace chandlerpop = 24 if city_jjk == "BRESCIA" & year == 1750
replace chandlerpop = 27 if city_jjk == "BRESCIA" & year == 1800
replace chandlerpop = 17 if city_jjk == "BRESCIA" & year == 1850
replace remipop = chandlerpop if city_jjk == "BRESCIA" 

** 57 PARMA **

replace chandlerpop = 20 if city_jjk == "PARMA" & year == 1300
replace chandlerpop = 35 if city_jjk == "PARMA" & year == 1700
replace chandlerpop = 34 if city_jjk == "PARMA" & year == 1750
replace chandlerpop = 33 if city_jjk == "PARMA" & year == 1800
replace chandlerpop = 41 if city_jjk == "PARMA" & year == 1850
replace remipop = chandlerpop if city_jjk == "PARMA" & year >= 1300

** 58 VICENZA **

replace chandlerpop = 17 if city_jjk == "VICENZA" & year == 1700

** 59 AACHEN **

replace chandlerpop = 16 if city_jjk == "AACHEN" & year == 1600
replace chandlerpop = 15 if city_jjk == "AACHEN" & year == 1700

** 60 AMIENS **

replace chandlerpop = 32 if city_jjk == "AMIENS" & year == 1700

** 61 BURGOS **

replace chandlerpop = 15 if city_jjk == "BURGOS" & year == 1100
replace chandlerpop = 18 if city_jjk == "BURGOS" & year == 1200
replace chandlerpop = 7 if city_jjk == "BURGOS" & year == 1500
replace chandlerpop = 9 if city_jjk == "BURGOS" & year == 1700

** 62 AREZZO ** 

* No estimate is provided in Chandler.

** 63 LAON **

* No estimate is provided in Chandler.

** 64 LEUVEN 

replace chandlerpop = . if city_jjk == "LEUVEN" & year == 1100
replace chandlerpop = 7 if city_jjk == "LEUVEN" & year == 1200
replace chandlerpop = 14 if city_jjk == "LEUVEN" & year == 1300
replace chandlerpop = 21 if city_jjk == "LEUVEN" & year == 1400
replace chandlerpop = 19 if city_jjk == "LEUVEN" & year == 1500
replace chandlerpop = 9 if city_jjk == "LEUVEN" & year == 1600
replace chandlerpop = 14 if city_jjk == "LEUVEN" & year == 1700
replace remipop = chandlerpop if city_jjk == "LEUVEN" & year >= 1200 & year <= 1700

** 65 LYON **

replace chandlerpop = . if city_jjk == "LYON" & year == 1100
replace chandlerpop = 97 if city_jjk == "LYON" & year == 1700

** 66 MEDINA-DEL-CAMPO **

* Little data in Chandler.

** 67 PIACENZA **

replace chandlerpop = 26 if city_jjk == "PIACENZA" & year == 1500
replace chandlerpop = 31 if city_jjk == "PIACENZA" & year == 1700
replace remipop = chandlerpop if city_jjk == "PIACENZA" & year >= 1200 & year <= 1700

** 68 TOURNAI **

* No estimate is provided in Chandler.

** 69 WIEN **

replace chandlerpop = . if city_jjk == "WIEN" & year == 1100
replace chandlerpop = 105 if city_jjk == "WIEN" & year == 1700

** 70 WORMS **

replace chandlerpop = 7 if city_jjk == "WORMS" & year == 1500
replace chandlerpop = 2 if city_jjk == "WORMS" & year == 1700

** 71 ALMERIA **

replace chandlerpop = 25 if city_jjk == "ALMERIA" & year == 1500
replace chandlerpop = 5.5 if city_jjk == "ALMERIA" & year == 1700
replace remipop = chandlerpop if city_jjk == "ALMERIA" & year >= 1300 & year <= 1500

** 72 BRUXELLES **

replace chandlerpop = 70 if city_jjk == "BRUXELLES" & year == 1700

** 73 MODENA **

* No estimate is provided in Bairoch.

** 74 NIMES **

replace chandlerpop = 18 if city_jjk == "NIMES" & year == 1700

** 75 ANTWERPEN **

replace chandlerpop = 19 if city_jjk == "ANTWERPEN" & year == 1400
replace chandlerpop = 67 if city_jjk == "ANTWERPEN" & year == 1700
replace remipop = chandlerpop if city_jjk == "ANTWERPEN" & year >= 1400 & year <= 1700

** 76 DIJON **

replace chandlerpop = 18 if city_jjk == "DIJON" & year == 1320
replace chandlerpop = 19 if city_jjk == "DIJON" & year == 1400
replace chandlerpop = 34 if city_jjk == "DIJON" & year == 1700
replace remipop = chandlerpop if city_jjk == "DIJON" & year >= 1300 & year <= 1700

** 77 PALMA **

replace chandlerpop = 27 if city_jjk == "PALMA" & year == 1100
replace chandlerpop = 15 if city_jjk == "PALMA" & year == 1500
replace chandlerpop = 26 if city_jjk == "PALMA" & year == 1700
replace remipop = chandlerpop if city_jjk == "PALMA" & year >= 1100 & year <= 1700

** 78 ALESSANDRIA **

replace chandlerpop = 9 if city_jjk == "ALESSANDRIA" & year == 1500
replace chandlerpop = 12 if city_jjk == "ALESSANDRIA" & year == 1700

** 79 ASTI **

* No estimate is provided in Chandler.

** 80 BEAUVAIS **

* No estimate is provided in Chandler.

** 81 BEZIERS **

replace chandlerpop = 14 if city_jjk == "BEZIERS" & year == 1300
replace chandlerpop = 4 if city_jjk == "BEZIERS" & year == 1400
replace chandlerpop = 18 if city_jjk == "BEZIERS" & year == 1700

** 82 BOURGES **

replace chandlerpop = 10 if city_jjk == "BOURGES" & year == 1100
replace chandlerpop = 15 if city_jjk == "BOURGES" & year == 1700
replace remipop = 10 if city_jjk == "BOURGES" & year == 1100
replace remipop = 13 if city_jjk == "BOURGES" & year == 1200

** 83 JAEN **

replace chandlerpop = 19 if city_jjk == "JAEN" & year == 1700

** 84 LUCCA **

replace chandlerpop = 23 if city_jjk == "LUCCA" & year == 1320
replace chandlerpop = 22 if city_jjk == "LUCCA" & year == 1700
replace remipop = chandlerpop if city_jjk == "LUCCA" & year >= 1100 & year <= 1700

** 85 MUENSTER **

* No estimate is provided in Chandler.

** 86 AVERSA **

* No estimate is provided in Chandler.

** 87 BARBASTRO **

* No estimate is provided in Chandler.

** 88 BAYONNE **

* No estimate is provided in Chandler.

** 89 CAEN **

replace chandlerpop = 20 if city_jjk == "CAEN" & year == 1100 
replace chandlerpop = 35 if city_jjk == "CAEN" & year == 1700
replace remipop = chandlerpop if city_jjk == "CAEN" & year >= 1100 & year <= 1700

** 90 ECIJA **

replace chandlerpop = 10 if city_jjk == "ECIJA" & year == 1700 

** 91 MAGDEBURG **

replace chandlerpop = 12 if city_jjk == "MAGDEBURG" & year == 1700

** 92 MURCIA **

replace chandlerpop = 12 if city_jjk == "MURCIA" & year == 1500 
replace chandlerpop = 25 if city_jjk == "MURCIA" & year == 1700

** 93 POITIERS **

replace chandlerpop = 20 if city_jjk == "POITIERS" & year == 1100
replace chandlerpop = 18 if city_jjk == "POITIERS" & year == 1700

** 94 PRATO **

* No estimate is provided in Chandler

** 95 SALAMANCA **

replace chandlerpop = 12 if city_jjk == "SALAMANCA" & year == 1700

** 96 STRASBOURG **

replace chandlerpop = 10 if city_jjk == "STRASBOURG" & year == 1100
replace chandlerpop = 10 if city_jjk == "STRASBOURG" & year == 1200
replace chandlerpop = 27 if city_jjk == "STRASBOURG" & year == 1700

** 97 WINCHESTER **

* No estimate is provided in Chandler

** 98 ZARAGOZA **

replace chandlerpop = 20 if city_jjk == "ZARAGOZA" & year == 1100
replace chandlerpop = 29 if city_jjk == "ZARAGOZA" & year == 1700
replace remipop = chandlerpop if city_jjk == "ZARAGOZA" & year >= 1100 & year <= 1700

** 99 BERGAMO **

* No estimate is provided in Chandler.

** 100 FORLI **

* No estimate is provided in Chandler.

* We also modify the data for various cities that are not among the top 100 cities in 1300 but for which the Bairoch data is likely unreliable based on a comparison of the different sources we used.

** AVIGNON & CARPENTRAS **

* Bairoch uses 6 for 1300, which is too low. Avignon reached 35 as soon as it became the capital of Christianity in 1309. We use the 1309 estimate from Chandler.
replace chandlerpop = 35 if city_jjk == "AVIGNON" & year == 1300
replace chandlerpop = 20 if city_jjk == "AVIGNON" & year == 1400
replace chandlerpop = 18 if city_jjk == "AVIGNON" & year == 1500
replace chandlerpop = 21 if city_jjk == "AVIGNON" & year == 1600
replace chandlerpop = 23 if city_jjk == "AVIGNON" & year == 1700
replace chandlerpop = 24 if city_jjk == "AVIGNON" & year == 1750
replace chandlerpop = 19 if city_jjk == "AVIGNON" & year == 1800
replace chandlerpop = 32 if city_jjk == "AVIGNON" & year == 1850
replace remipop = chandlerpop if city_jjk == "AVIGNON"
replace pop = 1 if city_jjk == "CARPENTRAS" & year == 1300
replace remipop = 1 if city_jjk == "CARPENTRAS" & year == 1300

** CASTELLON **

* Bairoch says 7 in 1300.
* But Christakos et al (2008) says 2 for pre-plague. We use the average, 4.5.  
replace remipop = 4.5 if city_jjk == "CASTELLON-DE-LA-PLANA" & year == 1300

** YORK **

* Bairoch says 8 in 1300.
* This is clearly under-estimated. We use Campbell for 1300.
replace remipop = 22.7 if city_jjk == "YORK" & year == 1300

* We then clean the data. 
drop pop
ren remipop pop
label var pop "Population in thousands (various sources)" 
label var chandlerpop "Population in thousands (source: Chandler)" 
sort countrycity year
gen pop100 = pop if year == 1000
bysort countrycity: egen pop1000 = max(pop100)
drop pop100
label var pop1000 "Population in thousands (various sources) in 1000"
sort countrycity year
gen pop100 = pop if year == 1100
bysort countrycity: egen pop1100 = max(pop100)
drop pop100
label var pop1100 "Population in thousands (various sources) in 1100"
sort countrycity year
gen pop200 = pop if year == 1200
bysort countrycity: egen pop1200 = max(pop200)
drop pop200
label var pop1200 "Population in thousands (various sources) in 1200"
gen pop300 = pop if year == 1300
bysort countrycity: egen pop1300 = max(pop300)
drop pop300
label var pop1300 "Population in thousands (various sources) in 1300"
gen pop400 = pop if year == 1400
bysort countrycity: egen pop1400 = max(pop400)
drop pop400
label var pop1400 "Population in thousands (various sources) in 1400"
gen pop500 = pop if year == 1500
bysort countrycity: egen pop1500 = max(pop500)
drop pop500
label var pop1500 "Population in thousands (various sources) in 1500"
gen pop600 = pop if year == 1600
bysort countrycity: egen pop1600 = max(pop600)
drop pop600
label var pop1600 "Population in thousands (various sources) in 1600"
gen pop700 = pop if year == 1700
bysort countrycity: egen pop1700 = max(pop700)
drop pop700
label var pop1700 "Population in thousands (various sources) in 1700"
gen pop750 = pop if year == 1750
bysort countrycity: egen pop1750 = max(pop750)
drop pop750
label var pop1750 "Population in thousands (various sources) in 1750"
gen pop800 = pop if year == 1800
bysort countrycity: egen pop1800 = max(pop800)
drop pop800
label var pop1800 "Population in thousands (various sources) in 1800"
gen pop850 = pop if year == 1850
bysort countrycity: egen pop1850 = max(pop850)
drop pop850
label var pop1850 "Population in thousands (various sources) in 1850"
sort countryname city_jjk
save finaljjk2, replace 
count
tab countryname 
codebook longitude latitude
tab city_jjk countryname if longitude == .

use finaljjk2, clear
* We modify chandlerpop for the 139 cities of the main sample in order to test for the robustness of our results when using Chandler's data instead of Bairoch + Chandler.
* ABERDEEN
* Not enough data in Chandler 
* AIX
* Not enough data in Chandler 
* ALBI 
* Not enough data in Chandler 
* AMIENS 
* Not enough data in Chandler 
* ANGERS (no data for 1400)
replace chandlerpop = 22 if city_jjk == "ANGERS" & year == 1300
replace chandlerpop = 20 if city_jjk == "ANGERS" & year == 1600
* ANNECY
* Not enough data in Chandler 
* ANTWERPEN
* Not enough data in Chandler 
* ARLES (We use 1450 for 1400, no estimate for 1600)
replace chandlerpop = 17 if city_jjk == "ARLES" & year == 1300
replace chandlerpop = 6 if city_jjk == "ARLES" & year == 1400
* BAEZA
* Not enough data in Chandler 
* BATH 
* Not enough data in Chandler 
* BRAGA
* Not enough data in Chandler 
* BRAUNSCHWEIG
* Not enough data in Chandler 
* BREMEN
* Not enough data in Chandler 
* BRISTOL
* Not enough data in Chandler 
* BRUXELLES
* Not enough data in Chandler
* CAMBRIDGE
* Not enough data in Chandler
* CANTERBURY
* Not enough data in Chandler
* CARCASSONNE
* Not enough data in Chandler
* CARPENTRAS 
* Not enough data in Chandler
* CASTELLON-DE-LA-PLANA
* Not enough data in Chandler
* CATANIA
replace chandlerpop = 10 if city_jjk == "CATANIA" & year == 1300
replace chandlerpop = 18 if city_jjk == "CATANIA" & year == 1600
* CHESTER
* Not enough data in Chandler
* COIMBRA
* Not enough data in Chandler
* COLCHESTER
* Not enough data in Chandler
* CORK
* Not enough data in Chandler
* COVENTRY
* Not enough data in Chandler
* DOUAI
* Not enough data in Chandler
* DROGHEDA
* Not enough data in Chandler
* EDINBURGH
replace chandlerpop = 8 if city_jjk == "EDINBURGH" & year == 1300
replace chandlerpop = 10 if city_jjk == "EDINBURGH" & year == 1400
replace chandlerpop = (26+35)/2 if city_jjk == "EDINBURGH" & year == 1600
* FRANKFURTANDERODER
* Not enough data in Chandler
* GLOUCESTER
* Not enough data in Chandler
* GRENOBLE
* Not enough data in Chandler
* HALBERSTADT
* Not enough data in Chandler
* HAMBURG
replace chandlerpop = 8 if city_jjk == "HAMBURG" & year == 1300
replace chandlerpop = 22 if city_jjk == "HAMBURG" & year == 1400
replace chandlerpop = 22 if city_jjk == "HAMBURG" & year == 1600
* HANNOVER
* Not enough data in Chandler
* KIEL
* Not enough data in Chandler
* KILKENNY
* Not enough data in Chandler
* KONSTANZ
* Not enough data in Chandler
* LAON
* Not enough data in Chandler
* LEICESTER
* Not enough data in Chandler
* LERIDA
* Not enough data in Chandler
* LIMOGES
* Not enough data in Chandler
* LINCOLN
* Not enough data in Chandler
* LODEVE
* Not enough data in Chandler
* LUENEBURG
* Not enough data in Chandler
* MADRID
* Not enough data in Chandler
* MAINZ
* Not enough data in Chandler
* MONTAUBAN
* Not enough data in Chandler
* MUENCHEN
replace chandlerpop = 13 if city_jjk == "MUENCHEN" & year == 1300
replace chandlerpop = 20 if city_jjk == "MUENCHEN" & year == 1600
* MUENSTER
* Not enough data in Chandler
* MURCIA
* Not enough data in Chandler
* NEW-ROSS
* Not enough data in Chandler
* NORWICH
replace chandlerpop = 24 if city_jjk == "NORWICH" & year == 1300
replace chandlerpop = 7 if city_jjk == "NORWICH" & year == 1400
replace chandlerpop = 16 if city_jjk == "NORWICH" & year == 1600
* NOTTINGHAM
* Not enough data in Chandler
* NUERNBERG
* Not enough data in Chandler
* ORVIETO
* Not enough data in Chandler
* OSLO
* Not enough data in Chandler
* OSNABRUECK
* Not enough data in Chandler
* PADERBORN
* Not enough data in Chandler
* PAMPLONA
* Not enough data in Chandler
* PASSAU
* Not enough data in Chandler
* PERPIGNAN
* Not enough data in Chandler
* PISA
replace chandlerpop = 25 if city_jjk == "PISA" & year == 1300
replace chandlerpop = (10+13)/2 if city_jjk == "PISA" & year == 1600
* PISTOIA
* Not enough data in Chandler
* PLYMOUTH
* Not enough data in Chandler
* POITIERS
* Not enough data in Chandler
* PRATO
* Not enough data in Chandler
* REGENSBURG
* Not enough data in Chandler
* REIMS
replace chandlerpop = 15 if city_jjk == "REIMS" & year == 1300
replace chandlerpop = 10 if city_jjk == "REIMS" & year == 1400
replace chandlerpop = 18 if city_jjk == "REIMS" & year == 1600
* RIMINI
* Not enough data in Chandler
* ROSTOCK
* Not enough data in Chandler
* SAN-GIMIGNANO
* Not enough data in Chandler
* SANTAREM
* Not enough data in Chandler
* SILVES
* Not enough data in Chandler
* ST-FLOUR
* Not enough data in Chandler
* STOCKHOLM
* Not enough data in Chandler
* STRALSUND
* Not enough data in Chandler
* TARBES
* Not enough data in Chandler
* TARRAGONA
* Not enough data in Chandler
* TORINO
* Not enough data in Chandler
* TOULON
replace chandlerpop = 3 if city_jjk == "TOULON" & year == 1300
replace chandlerpop = 1 if city_jjk == "TOULON" & year == 1400
replace chandlerpop = 18 if city_jjk == "TOULON" & year == 1600
* TRAPANI
* Not enough data in Chandler
* TRIER
replace chandlerpop = 15 if city_jjk == "TRIER" & year == 1300
replace chandlerpop = 10 if city_jjk == "TRIER" & year == 1400
replace chandlerpop = (10+8)/2 if city_jjk == "TRIER" & year == 1600
* ULM
replace chandlerpop = 4 if city_jjk == "ULM" & year == 1300 
replace chandlerpop = 10 if city_jjk == "ULM" & year == 1400
replace chandlerpop = (12+15)/2 if city_jjk == "ULM" & year == 1600
* WARWICK
* Not enough data in Chandler
* WATERFORD
* Not enough data in Chandler
* WINCHESTER
* Not enough data in Chandler
* WISMAR
* Not enough data in Chandler
* WUERZBURG
* Not enough data in Chandler
* YORK
* Not enough data in Chandler
* ZUERICH
* Not enough data in Chandler
gen pop_c_300 = chandlerpop if year == 1300
bysort countrycity: egen pop_c_1300 = max(pop_c_300)
drop pop_c_300
label var pop_c_1300 "Population in thousands (Chandler) in 1300"
sort countryname city_jjk
save finaljjk2, replace

* We drop 5 cities that are far from continental Europe. They are cities in Norway or in the Azores.
use finaljjk2, clear
sort countryname city_jjk
save finaljjk2, replace
codebook city_id
codebook longitude latitude
tab city_jjk countryname if longitude == .
tab year
codebook city_jjk city_id countryname

*******************************
* BLACK DEATH MORTALITY RATES *
*******************************

* We add to the main data set the information on the Black Death.
* The main source is Christakos et al (2005).
* The latest version of the data set that we recreated is called "death_rates_v8". 
clear
import excel "death_rates_vFINAL", sheet("Sheet1") firstrow
replace countryname = "Yugoslavia" if countryname == "Croatia"
replace countryname = "Spain" if countryname == "Gibraltar"
gen city_jjk = upper(Place)
order city_jjk
drop if place_type == "r"
drop place_type
* These cities are already in the Bairoch data set.
replace city_jjk = "SIRACUSA" if city_jjk == "SYRACUSE"
replace city_jjk = "VICH" if city_jjk == "VIC"
replace city_jjk = "ST-DENIS" if city_jjk == "SAINT DENIS"
* mortality: when we obtain a range of estimates or the mortality rate is described in words
* mortality2: when we obtain a specific estimate of the mortality rate (either the estimate itself or an average of the range)
* we create a new mortality variable that uses as much information as possible
* to do so, we need to make a few assumptions regarding the mortality rates associated with a few literary intepretations of the mortality rate
gen mortality_final = mortality2
gen mortality_type = "Number" if mortality2 != .
replace mortality_type = "Desertion" if mortality_final == . & desertion != .
replace mortality_final = desertion/1.2 if mortality_final == .
* We use 1.2 to reconstruct the mortality rates from the desertion rates (see the data appendix for details)
replace mortality_type = "Clergy" if mortality_final == . & clergy != .
replace mortality_final = clergy/1.08 if mortality_final == .
* We use 1.08 to reconstruct the mortality rates from the clergy mortality rates (see the data appendix for details)
sum mortality_final if mortality_final > 0, d 
replace mortality_type = "Description" if mortality_final == . & (mortality == "completely depopulated" | mortality == "decimated" | mortality == "close to being depopulated" | mortality == "very hard hit" | mortality == "ferocious" | mortality == "highly depopulated" | mortality == "hardly hit" | mortality == "high" | mortality == "high clergy mortality" | mortality == "moderate" | mortality == "low" | mortality == "minimal" | mortality == "partially spared" | mortality == "spared" | mortality == "escaped")
replace mortality_final = 80 if mortality == "completely depopulated"
replace mortality_final = 80 if mortality == "decimated"
replace mortality_final = 80 if mortality == "close to being depopulated"
replace mortality_final = 66 if mortality == "very hard hit"
replace mortality_final = 66 if mortality == "ferocious"
replace mortality_final = 66 if mortality == "highly depopulated"
replace mortality_final = 50 if mortality == "hardly hit"
replace mortality_final = 50 if mortality == "high"
replace mortality_final = 50 if mortality == "high clergy mortality"
replace mortality_final = 25 if mortality == "moderate"
replace mortality_final = 20 if mortality == "low"
replace mortality_final = 10 if mortality == "minimal"
replace mortality_final = 10 if mortality == "partially spared"
replace mortality_final = 5 if mortality == "spared"
replace mortality_final = 5 if mortality == "escaped"
tab mortality_type if mortality_final != ., m
count if mortality_final != .
* We have 263 localities for which we have the mortality rate.
* However, in the analysis, we will only focus on the localities that were cities in 1300.
label var city_jjk "City Name"
label var start_month "Month of the start of the epidemic"
label var start_year "Year of the start of the epidemic"
label var end_month "Month of the end of the epidemic"
label var end_year "Year of the end of the epidemic"
ren mortality mortality_info
label var mortality_info "Mortality: Additional information"
label var mortality_type "Mortality: Type of information"
label var desertion "Desertion rate (%)"
label var clergy "Clergy mortality rate (%)"
ren mortality_final mortality
label var mortality "Mortality rate (%)"
ren mortality2 mortality_raw
label var mortality_raw "Mortality rate (%): Raw data"
drop remi Place preplaguepop Marknotes source Source
sort countryname city_jjk
save deathrates, replace
count if mortality != .
* 264 (263 + Sion)

* We merge with the main data set.
use finaljjk2, clear
codebook countryname city_jjk
sort countryname city_jjk
merge countryname city_jjk using deathrates, update
tab _m
codebook city_jjk if _m == 3 & mortality != .
drop if _m == 2
drop _m
* These are extra cities for which we found the mortality rate in other sources listed in the text.
replace mortality = 63 if city_jjk == "NAPOLI"
replace mortality = 50 if city_jjk == "CREMONA"
replace mortality = 50 if city_jjk == "ROMA"
replace mortality = 33.33 if city_jjk == "AACHEN"
replace mortality = 30 if city_jjk == "TOURNAI"
replace mortality = 52 if city_jjk == "APT"
replace mortality = 59 if city_jjk == "CHAMBERY"
replace mortality = 60 if city_jjk == "AREZZO"
replace mortality = 50 if city_jjk == "BASEL"
replace mortality = 66.6 if city_jjk == "AQUILA"
replace mortality_raw = 63 if city_jjk == "NAPOLI"
replace mortality_raw = 50 if city_jjk == "CREMONA"
replace mortality_raw = 50 if city_jjk == "ROMA"
replace mortality_raw = 33.33 if city_jjk == "AACHEN"
replace mortality_raw = 30 if city_jjk == "TOURNAI"
replace mortality_raw = 52 if city_jjk == "APT"
replace mortality_raw = 59 if city_jjk == "CHAMBERY"
replace mortality_raw = 60 if city_jjk == "AREZZO"
replace mortality_raw = 50 if city_jjk == "BASEL"
replace mortality_raw = 66.6 if city_jjk == "AQUILA"
replace mortality_type = "Number" if city_jjk == "NAPOLI"
replace mortality_type = "Number" if city_jjk == "CREMONA"
replace mortality_type = "Number" if city_jjk == "ROMA"
replace mortality_type = "Number" if city_jjk == "AACHEN"
replace mortality_type = "Number" if city_jjk == "TOURNAI"
replace mortality_type = "Number" if city_jjk == "APT"
replace mortality_type = "Number" if city_jjk == "CHAMBERY"
replace mortality_type = "Number" if city_jjk == "AREZZO"
replace mortality_type = "Number" if city_jjk == "BASEL"
replace mortality_type = "Number" if city_jjk == "AQUILA"
save finaljjk2, replace
tab year

* We make final changes to mortality and the start date based on external sources.
* We also modify the data for Gerona/Gijon in Spain, which are different cities.
use finaljjk2, clear
replace start_month = 5 if countrycity == "SpainGERONA"
replace start_year = 1348 if countrycity == "SpainGERONA"
replace end_month = 8 if countrycity == "SpainGERONA"
replace end_year = 1348 if countrycity == "SpainGERONA"
replace mortality_raw = 66 if countrycity == "SpainGERONA"
replace mortality_info = "2/3" if countrycity == "SpainGERONA"
replace clergy = . if countrycity == "SpainGERONA"
replace desertion = . if countrycity == "SpainGERONA"
replace mortality = 66 if countrycity == "SpainGERONA"
replace mortality_raw = 66 if countrycity == "SpainGERONA"
replace mortality_type = "Number" if countrycity == "SpainGERONA"
replace start_month = . if countrycity == "SpainGIJON"
replace start_year = . if countrycity == "SpainGIJON"
replace end_month = . if countrycity == "SpainGIJON"
replace end_year = . if countrycity == "SpainGIJON"
replace mortality_raw = . if countrycity == "SpainGIJON"
replace mortality_info = "" if countrycity == "SpainGIJON"
replace clergy = . if countrycity == "SpainGIJON"
replace desertion = . if countrycity == "SpainGIJON"
replace mortality = . if countrycity == "SpainGIJON"
replace mortality_raw = . if countrycity == "SpainGIJON"
replace mortality_type = "" if countrycity == "SpainGIJON"
* These are towns with mortality data that never appear in Bairoch
gen sample13 = (pop >= 1 & pop != .) if year == 1300
bysort city_jjk: egen sample = max(sample13)
drop sample13
label var sample "Sample of cities that existed in 1300 (> 1,000 inh.)"
save finaljjk2, replace

* We drop some additional countries at the edge of our sample where we do not have enough population data. 
use finaljjk2, clear
gen mortality_yn = (mortality != .)
tab mortality_yn sample if year == 1300
bysort countryname: egen mortality_yn_c = max(mortality_yn)
tab countryname if mortality_yn_c == 0
* Not Denmark, Poland, Slovakia, The Netherlands
label var mortality_yn_c "Data on mortality in country" 
drop if countryname == "Poland" | countryname == "Slovakia"
* We keep Denmark and the Netherlands because they are surrounded by countries with data.
tab countryname, m
* 13 countries
label var mortality_yn "Data on mortality"
drop mortality_yn_c
gen samplemort = (sample == 1 & mortality_yn == 1)
label var samplemort "Samples of cities in 1300 with mortality data" 
tab samplemort if year == 1300
* 165
save finaljjk2, replace

* We create variables if population is missing, as it indicates it is below 1K, so we use 0.5K.
use finaljjk2, clear
foreach X of varlist pop-pop1850 {
gen l`X' = log(`X')
label var l`X' "Log of variable `X'"
gen `X'_05 = `X'
replace `X'_05 = 0.5 if `X' == . | `X' == 0
label var `X'_05 "Replacing 0 by 0.5 for variable `X'"
gen l`X'_05 = log(`X'_05)
label var l`X'_05 "Log of variable `X'_05"
}
save finaljjk2, replace

* Variables measuring the percentage change in population
use finaljjk2, clear
sort countrycity year
* Centennial change
bysort countrycity: gen changepop=(pop-pop[_n-1])/pop[_n-1]*100
bysort countrycity: gen changepop_lag=(pop[_n-1]-pop[_n-2])/pop[_n-2]*100
label var changepop "Pct change between year t-1 and year t"
label var changepop_lag "Pct change between year t-2 and year t-1"
* Century-specific change
bysort countrycity: gen changepop1100=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1100
bysort countrycity: gen changepop1200=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1200
bysort countrycity: gen changepop1300=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1300
bysort countrycity: gen changepop1400=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1400
bysort countrycity: gen changepop1500=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1500
bysort countrycity: gen changepop1600=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1600
bysort countrycity: gen changepop1700=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1700
bysort countrycity: gen changepop1750=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1750
bysort countrycity: gen changepop1800=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1800
bysort countrycity: gen changepop1850=(pop-pop[_n-1])/pop[_n-1]*100 if year == 1850
foreach X in 1100 1200 1300 1400 1500 1500 1700 1750 1800 1850 {
label var changepop`X' "Pct change in year `X' relative to the previous year"
}
* Long-difference 
bysort countrycity: gen changepop13001500=(pop-pop[_n-2])/pop[_n-2]*100 if year == 1500
bysort countrycity: gen changepop13001600=(pop-pop[_n-3])/pop[_n-3]*100 if year == 1600
bysort countrycity: gen changepop13001700=(pop-pop[_n-4])/pop[_n-4]*100 if year == 1700
bysort countrycity: gen changepop13001750=(pop-pop[_n-5])/pop[_n-5]*100 if year == 1750
bysort countrycity: gen changepop13001800=(pop-pop[_n-6])/pop[_n-6]*100 if year == 1800
bysort countrycity: gen changepop13001850=(pop-pop[_n-7])/pop[_n-7]*100 if year == 1850
bysort countrycity: gen changepop11001300=(pop-pop[_n-2])/pop[_n-2]*100 if year == 1300
bysort countrycity: gen changepop14001600=(pop-pop[_n-2])/pop[_n-2]*100 if year == 1600
bysort countrycity: gen changepop12001300=(pop[_n-3]-pop[_n-4])/pop[_n-4]*100 if year == 1600
foreach X in 1500 1500 1700 1750 1800 1850 {
label var changepop1300`X' "Pct change in year `X' relative to 1300"
}
label var changepop11001300 "Pct change in 1100-1300"
label var changepop14001600 "Pct change in 1400-1600"
label var changepop12001300 "Pct change in 1200-1300"
* Lags 
bysort countrycity: gen changepop1400_lag1 =(pop[_n-1]-pop[_n-2])/pop[_n-2]*100 if year == 1400
bysort countrycity: gen changepop1400_lag2 =(pop[_n-2]-pop[_n-3])/pop[_n-3]*100 if year == 1400
label var changepop1400_lag1 "Pct change in 1200-1300"
label var changepop1400_lag2 "Pct change in 1100-1200"
save finaljjk3, replace
use finaljjk3, clear
tab year

* Representativity of sample *
* The main sample includes 16 countries of Western Europe. 
* This includes Denmark and The Netherlands for which we do not have any mortality estimate.
use finaljjk3, clear
drop if countryname == "Denmark" | countryname == "The Netherlands"
keep if year == 1300
keep if pop1300 >= 1 & pop1300 != .
collapse (sum) pop, by(mortality_yn)
gen test = 1
reshape wide pop, i(test) j(mortality_yn)
gen pop_share = pop1/(pop0+pop1)*100
sum pop_share 
* About 60%. 

***********************************
*** MODERN POPULATION ESTIMATES ***
***********************************

* Source: We checked the Wikipedia webpage of each city. 
* We obtained two estimates c. 2015, for the city itself, and for the whole metro area. 
* However, Wikipedia is not clear how the metro area is defined. 
clear
import excel "modern_pop_165 (Sam1) (1).xlsx", sheet("Sheet1") firstrow clear
destring pop_circa_2015_city pop_circa_2015_metro, replace
egen pop2015max = rmax(pop_circa_2015_city pop_circa_2015_metro)
egen pop2015min = rmin(pop_circa_2015_city pop_circa_2015_metro)
keep city_id year pop2015m*
foreach X of varlist pop2015m* {
replace `X' = `X'/1000
} 
ren year year_for_2015
sort city_id
save modern_pop, replace
count

use finaljjk3, clear
sort city_id
merge city_id using modern_pop
tab _m
drop _m
sort countrycity year
bysort countrycity: gen changepop13002015max=(pop2015max-pop[_n-7])/pop[_n-7]*100 if year == 1850
bysort countrycity: gen changepop13002015min=(pop2015min-pop[_n-7])/pop[_n-7]*100 if year == 1850
label var pop2015max "City pop estimate c. 2015 (metro area; source: Wikipedia)"
label var pop2015min "City pop estimate c. 2015 (city; source: Wikipedia)"
label var changepop13002015max "Log change city pop 1300-2015 (metro area; source: Wikipedia)"
label var changepop13002015min "Log change city pop 1300-2015 (city; source: Wikipedia)"
save finaljjk3, replace

********************************
* EXTRAPOLATED MORTALITY RATES *
********************************

** These are several data sets that we consider to construct spatially extrapolated mortality rates **

* Mortality rates to use as a basis to reconstruct the extrapolated mortality rates. 
use finaljjk3, clear
keep if year == 1300
keep if mortality != .
count
keep countryname city_jjk mortality mortality_raw longitude latitude
save mort_for_extrap, replace
count
* 274 cities including some additional cities not in Christakos. 
count if mortality_raw != .
* 177 cities with raw mortality estimates.

use deathrates, clear
keep countryname city_jjk mortality mortality_raw 
keep if mortality != .
drop if city_jjk == "GIJON"
sort countryname city_jjk
save mort_for_extrap_christakos, replace
count
* 263 cities that appear in Christakos.
count if mortality_raw != .
* 166 cities with raw mortality estimates.

** These are the data sets with the extrapolated mortality rates **

* These were created by Noel Johnson using a GIS softaware. 

* We only use the 263 cities.
use mort_for_extrap, clear
sort countryname city 
merge countryname city using mort_for_extrap_christakos
tab _m
tab city_jjk if _m == 1
* ok
drop _m
codebook
* ok
export excel using "mort_for_extrap.xls", firstrow(variables) replace
* This data set was sent to Noel Johnson who then used GIS to create extrapolated mortality rates as described in the web appendix.

* The data must be recreated for this data set of 1802 cities in the 16 countries.
use finaljjk3, clear
keep if year == 1300
keep countryname city_id city_jjk longitude latitude
codebook 
gen mainsample = 1
* problems with longitude and latitude
save cities_data_needed.dta, replace
count
* 1802

* We privilege these extrapolated data sets. 
use "mort274.dta", clear
sort city_id
save extrap274, replace
codebook city_id
* 1801
use "mort177.dta", clear
sort city_id
save extrap177, replace
codebook city_id
* 1801

** We combine with the main data set **

use finaljjk3, clear
* We drop this city from the Azores. 
drop if city_jjk == "PONTA-DELGADA"
sort city_id
merge city_id using extrap274
tab _m
drop _m
sort city_id
merge city_id using extrap177
tab _m
drop _m
* Correlation predicted and true mortality 
codebook mortality177 if year == 1850
codebook mortality if year == 1850
corr mortality mortality177 if year == 1300
corr mortality mortality177 [w=pop1300] if year == 1300
gen mortalt177 = mortality177
replace mortality177 = mortality if mortality != .
* Correlation predicted and true mortality 
codebook mortality274 if year == 1850
codebook mortality if year == 1850
corr mortality mortality274 if year == 1300
corr mortality mortality274 [w=pop1300] if year == 1300
gen mortalt274 = mortality274
replace mortality274 = mortality if mortality != .
label var mortalt177 "Extrapolated mortality rate based on 177 raw mortality cities" 
label var mortalt274 "Extrapolated mortality rate based on 274 mortality cities" 
label var mortality177 "Mortality + extrapolated mortality rate based on 177 cities" 
label var mortality274 "Mortality + extrapolated mortality rate based on 274 cities" 
corr mortality mortalt274 mortalt177 if year == 1300
corr mortality mortality274 mortality177 if year == 1300
save finaljjk4, replace

** Country-level extrapolations **

* List of cities in the full sample.
use finaljjk3, clear
keep if year == 1300
keep countryname city_id city_jjk longitude latitude
codebook 
gen mainsample = 1
save cities_data_needed.dta, replace
count
* 1802
* For the countries that are not among the 16 countries, we use the average for the country instead.
use finaljjk, clear
keep if countryname == "Albania" | countryname == "Bulgaria" | countryname == "Finland" | countryname == "Greece" | countryname == "Hungary" | countryname == "Romania" | countryname == "Russia" | countryname == "Malta"  | countryname == "Yugoslavia" | countryname == "Poland" | countryname == "Slovakia"
bysort countryname city_id city_jjk: keep if _n == 1
keep countryname city_id city_jjk longitude latitude
tab countryname
codebook 
save cities_eastern_europe, replace
count
* 478
* We add cities from the full sample. 
append using cities_data_needed.dta
sum city_id
* We add cities from the MENA (see text for details). 
append using cities_mena.dta
sum city_id
replace city_jjk = city if city_jjk == ""
drop year city
replace mainsample = 0 if mainsample == .
save cities_data_all.dta, replace
* This data set helps for the market access calculations below. 

use finaljjk4, clear
tab year
* 1801 per year

*******************************
* PHYSICAL GEOGRAPHY CONTROLS *
*******************************

*** TEMPERATURES ***

* See the web appendix for details on the source: Luterbacher et al. (2004).
* We created the variable in GIS. 

* 1500-1600 (earliest century for which we have good data)
use Temperatures1500_1799_new, clear
keep if year <= 1600
collapse (mean) temp, by(city_id)
ren temp avtemp15001600
label var avtemp15001600 "Mean summer temperatures (celsius degrees) in 1500-1600"
sort city_id
save temp15001600, replace

* We add to the main data set. 
use finaljjk4, clear
sort city_id 
merge city_id using temp15001600
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
save finaljjk4, replace
tab year 

*** ELEVATION ***

* See the web appendix for details on the source: Jarvis et al. (2008)
* We obtained the variable in GIS. 

use "elevation_new.dta", clear
keep city_id elevation
label var elevation "Altitude (m) of the city"
sort city_id 
save elevation, replace

use finaljjk4, clear
sort city_id
merge city_id using elevation
tab _m
drop _m
save finaljjk4, replace
tab year 

*** CEREAL SUITABILITY ***

* See the web appendix for details on the source: Fischer et al. (2002).
* We created the variable in GIS. 

use "cerealsuit_new.dta", clear
ren CerealSuitNearest cerealclosest
ren cerealsuit25k cereal25k
ren cerealsuit50k cereal50k
ren cerealsuit100k cereal100k
* In the analysis, we use closest 
keep city_id cerealclosest 
label var cerealclosest "Average cereal suitability within 10km"
sort city_id
save cerealnew, replace
count

use finaljjk4, clear
sort city_id
merge city_id using cerealnew
tab _m 
drop _m
save finaljjk4, replace
tab year 

*** POTATO SUITABILITY ***

* See the web appendix for details on the source: Global Agro-Ecological Zones (GAEZ).
* We created the variable in GIS. 

use "potatosuit_new.dta", clear
ren potatoclosest potatolowclosest
ren potatosuit25k potatolow25k
ren potatosuit50k potatolow50k
ren potatosuit100k potatolow100k
* In the analysis, we use closest 
keep city_id potatolowclosest 
label var potatolowclosest "Average potato suitability within 10km"
sort city_id
save potatosuitnew, replace
count

use finaljjk4, clear
sort city_id
merge city_id using potatosuitnew
tab _m 
drop _m
save finaljjk4, replace
tab year 

*** PASTORAL SUITABILITY ***

* See the web appendix for details on the source: .
* We created the variable in GIS. 

use pastoralweaknew, clear
ren pastoralweakdummy pastoral_weak_closest
ren pastoralweak25k pastoral_weak_25k
ren pastoralweak50k pastoral_weak_50k
ren pastoralweak100k pastoral_weak_100k
* In the analysis, we use closest 
keep city_id pastoral_weak_closest 
label var pastoral_weak_closest "Average pastoral suitability within 10km"
sort city_id
save pastoralsuitweak_new, replace

use finaljjk4, clear
sort city_id
merge city_id using pastoralsuitweak_new
tab _m 
drop _m
save finaljjk4, replace
tab year, m
tab year 

*** DISTANCE TO THE COAST ***

* There are three seas/oceans, hence coastlines. 
* We created the variables in GIS. 
* Note that we created two versions of the distance to the coast data, based on different GIS files of coastlines. 

*** North-Baltic (NB) Sea ***
import delimited "dist2dotsNBrestricted.csv", clear
ren long_nb nb_lon
ren lat_nb nb_lat
drop f_code
save nb_points_temp, replace
* These are points (due to the high resolution of the coastline GIS file).
* We then obtain the minimal distance from each city to the coastline. 
* The commands below take time to run. We thus do not run then.  
*use finaljjk4, clear
*keep if year == 1300
*keep city_id longitude latitude
*ren longitude city_lon
*ren latitude city_lat
*cross using nb_points_temp
*geodist city_lat city_lon nb_lat nb_lon, gen(dist)
*collapse (min) dist, by(city_id)
*keep city_id dist
*ren dist dist2nb
*sort city_id
*save dist2nb, replace

*** Atlantic Ocean ***
import delimited "dist2dotsArestricted.csv", clear
ren long a_lon
ren lat a_lat
drop f_code
save a_points_temp, replace
* These are points (due to the high resolution of the coastline GIS file).
* We then obtain the minimal distance from each city to the coastline. 
* The commands below take time to run. We thus do not run then.  
*use finaljjk4, clear
*keep if year == 1300
*keep city_id longitude latitude
*ren longitude city_lon
*ren latitude city_lat
* 1,801
*cross using a_points_temp
*geodist city_lat city_lon a_lat a_lon, gen(dist)
*collapse (min) dist, by(city_id)
*keep city_id dist
*ren dist dist2a
*sort city_id
*save dist2a, replace

*** Mediterranean sea ***
import delimited "dist2dotsMrestricted.csv", clear
ren long m_lon
ren lat m_lat
drop f_code
save m_points_temp, replace
* These are points (due to the high resolution of the coastline GIS file).
* We then obtain the minimal distance from each city to the coastline. 
* The commands below take time to run. We thus do not run then.  
*use finaljjk4, clear
*keep if year == 1300
*keep city_id longitude latitude
*ren longitude city_lon
*ren latitude city_lat
* 1,801
*cross using m_points_temp
*geodist city_lat city_lon m_lat m_lon, gen(dist)
*collapse (min) dist, by(city_id)
*keep city_id dist
*ren dist dist2m
*sort city_id
*save dist2m, replace

*** Older files ***
* These are an older file of distance to coastlines that we created.
* We use this file too in case the coastlines slightly differ.
use dist2AMNBold, clear
sort countryname city_jjk
save dist2AMNBold, replace

*** We merge with the data ***

use finaljjk4, clear 
sort city_id 
merge city_id using dist2nb
tab _m
drop _m
ren dist2nb dist2NB
sort city_id 
merge city_id using dist2a
tab _m
drop _m
ren dist2a dist2A
sort city_id 
merge city_id using dist2m
tab _m
drop _m
ren dist2m dist2M
* We create our main coast variable. 
egen dist2AMNB = rmin(dist2A dist2M dist2NB)
label var dist2AMNB "Euclidean distance (km) to any coastal city"
foreach X in dist2NB dist2A dist2M dist2AMNB  {
gen `X'_5 = (`X' <= 5)
gen `X'_10 = (`X' <= 10)
gen `X'_50 = (`X' <= 50)
label var `X'_5 "Dummy if distance to the coast < 5 Km (for `X')"
label var `X'_10 "Dummy if distance to the coast < 10 Km (for `X')"
label var `X'_50 "Dummy if distance to the coast < 50 Km (for `X')"
}
* We correct it using the information from the older file. 
sort countryname city_jjk
merge countryname city_jjk using dist2AMNBold
tab _m
tab city_jjk if _m == 2
drop if _m == 2
drop _m
foreach X in A M NB AMNB {
gen dist2`X'new = dist2`X'
replace dist2`X'new = dist2`X'old if mortality != . & dist2`X'old == 0
label var dist2`X'new "Distance to the `X' coast (km)"
}
* We correct one small city for which there is an obvious mistake based on Google Map
replace dist2Mnew = 0 if city_jjk == "MARSILLARGUES"
replace dist2AMNBnew = 0 if city_jjk == "MARSILLARGUES"
foreach X in dist2NB dist2A dist2M dist2AMNB  {
gen `X'new_5 = (`X'new <= 5)
gen `X'new_10 = (`X'new <= 10)
gen `X'new_50 = (`X'new <= 50)
label var `X'new_5 "Dummy if distance to the coast < 5 Km (for `X'new)"
label var `X'new_10 "Dummy if distance to the coast < 10 Km (for `X'new)"
label var `X'new_50 "Dummy if distance to the coast < 50 Km (for `X'new)"
}
save finaljjk4, replace
tab year 

*** DISTANCE TO RIVERS ***

* See Web Appendix for details on the source. 
* We created the variable in GIS. 

use "DRivers_new.dta" 
sort city_id
save rivers, replace

use finaljjk4, clear
sort city_id
merge city_id using rivers
tab _m
drop _m
ren DRiver drivers
replace drivers = drivers/1000
* was in meters, now in Km
label var drivers "Distance to a river (km)"
gen rivers_10 = (drivers <= 10)
replace rivers_10 = . if drivers == .
label var rivers_10 "Dummy if less than 10 km from a river"
save finaljjk4, replace
tab year, m

*******************************
* ECONOMIC GEOGRAPHY CONTROLS *
*******************************

*** LOG POP ***

use finaljjk4, clear
gen lp1300  =log(pop1300)
gen pop1353 = pop1300/100*(100-mortality)
gen pop1353e = pop1300/100*(100-mortality274)
gen lp1353  =log(pop1353)
label var lp1300 "Log of pop 1300"
label var pop1353 "Pop estimated in the aftermath of the Black Death"
label var pop1353 "Pop estimated in the aftermath of the Black Death, incl. extrapolated mortality"
label var lp1353 "Log of pop 1353"
save finaljjk4, replace
tab year

*** MARKET ACCESS *** 

* We obtain the list of origin cities
use finaljjk4, clear
keep if year == 1300
keep city_id
gen ocity = city_id 
keep ocity
sort ocity
save ocity_list, replace

* We obtain the list of destination cities
use finaljjk4, clear
keep if year == 1300
keep city_id
gen dcity = city_id 
keep dcity
sort dcity
save dcity_list, replace

* We then obtain the travel costs between these in GIS. 
* The Web Appendix provides details on the sources and modes of transportation/speeds used.
use "TravelCostsNew.dta", clear
count
codebook ocity dcity
sort ocity 
merge ocity using ocity_list
tab _m
keep if _m == 3
drop _m
sort dcity 
merge dcity using dcity_list
tab _m
keep if _m == 3
drop _m
count
sort ocity dcity
save travelcosts_1801x1801, replace

* We obtain the Euclidean distance too. 
use finaljjk4, clear
bysort city_id: keep if _n == 1
keep city_id longitude latitude
save dist_cost, replace
use dist_cost, clear
ren city_id dcity
ren longitude dlong
ren latitude dlat
cross using dist_cost
ren city_id ocity
ren longitude olong
ren latitude olat
geodist olat olong dlat dlong, gen(dist)
keep dcity ocity dist
ren dist travelcost_dist
ren ocity origin_city
ren dcity dest_city
sort origin_city dest_city
save travelcost_dist, replace

* We create the file that we use to create the market access variables. 
use finaljjk4, clear
keep if year == 1300
tab city_id if city_jjk == "MESSINA"
* 1449
keep city_id pop1300 pop1353 pop1353e pop1400 pop1600 pop1700 pop1750 mortality mortality274 
ren city_id dest_city
sort dest_city
save MA, replace
count

**** Overall MA with cost 1, cost 2, speed 1, speed 2 parameters *****

* Relying on extrapolated mortality (1353e) *
use travelcosts_1801x1801, clear
ren ocity origin_city
ren dcity dest_city
sort dest_city
merge dest_city using MA
tab _m
drop _m
* Adding the travel cost based on Euclidean distance only.
sort origin_city dest_city
merge origin_city dest_city using travelcost_dist
tab _m
drop _m
* We drop the "own" city
drop if origin == dest
sort origin dest
order origin dest pop* mortality mortality274
foreach X in cost1 cost2 speed1 speed2 dist {
foreach Y in 1300 1353e 1400 1600 1700 1750 {
gen ma`Y'_`X'_38 = pop`Y'/(travelcost_`X')^(3.8)
gen ma`Y'_`X'_2 = pop`Y'/(travelcost_`X')^(2)
gen ma`Y'_`X'_1 = pop`Y'/(travelcost_`X')^(1)
}
}
* We obtain the sum of market access by origin city.
collapse (sum) ma*, by(origin)
* We obtain its log. 
foreach X of varlist ma*_*_* {
gen l`X' = log(`X')
}
ren origin city_id
drop ma*
foreach N in 1300 1353e 1400 1600 1700 1750 { 
foreach X in cost1 cost2 speed1 speed2 dist {
foreach D in 1 2 38 {
label var lma`N'_`X'_`D' "Log market access, `N', costs = `X', sigma = `D'"
}
}
}
sort city_id
save ma_1801x1801, replace
count

** We combine with the main data set **

use finaljjk4, clear
sort city_id
merge city_id using ma_1801x1801
tab _m
drop _m
save finaljjk4, replace
tab year

*****************************
*** ROADS AND LAND ROUTES ***
*****************************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

*** Distance to Roman Roads ***

* This file has the distance measures.
use EconGeographynew, clear
ren Dmajorromanrd DMajorRomRoad 
ren Danyromanrd DAnyRomRoad
ren Dmajorromanrdintersect DMajRomIntersection
ren Danyromanrdintersect DAnyRmRdIntersect
keep city_id *Rom* *Rm*
drop DMajRomIntersection
sort city_id
save romannew, replace

* This file has the intersections. 
use "FixedIntersections.dta", clear
keep city_id Dmajorromanrdintersect
ren Dmajorromanrdintersect DMajRomIntersection
sort city_id
save romannew2, replace

* We merge with the main data set. 
use finaljjk4, clear
sort city_id
merge city_id using romannew
tab _m
drop _m
sort city_id
merge city_id using romannew2
tab _m
drop _m
* We express in km. 
replace DMajorRomRoad = DMajorRomRoad/1000
gen DMajorRomRoad_10 = (DMajorRomRoad <= 10)
label var DMajorRomRoad_10 "Dummy if less than 10 km from a major Roman road"
* We express in km. 
replace DAnyRomRoad = DAnyRomRoad/1000
gen DAnyRomRoad_10 = (DAnyRomRoad <= 10)
label var DAnyRomRoad_10 "Dummy if less than 10 km from any Roman road"
* We express in km. 
replace DMajRomIntersection = DMajRomIntersection/1000
gen DMajRomIntersection_10 = (DMajRomIntersection <= 10)
label var DMajRomIntersection_10 "Dummy if less than 10 km from a major Roman road intersection"
* We express in km. 
replace DAnyRmRdInter = DAnyRmRdInter/1000
gen DAnyRmRdIntersection_10 = (DAnyRmRdIntersect <= 10)
label var DAnyRmRdIntersection_10 "Dummy if less than 10 km from any Roman road intersection"
save finaljjk4, replace
tab year, m

*** Distance to Land Routes ***

* This file has the distance measures.
use EconGeographynew, clear
corr *
corr Dmajorromanrdintersect Dmedrdintersect 
keep city_id Dmedrd Dmedrdintersect
ren Dmedrd dist2landroute
ren Dmedrdintersect dist2landrouteint
drop dist2landrouteint
sort city_id
save landroutes, replace

* This file has the intersections. 
use "FixedIntersections.dta", clear
keep city_id Dmedievalrdintersect
ren Dmedievalrdintersect dist2landrouteint
sort city_id
save landroutes2, replace

* We merge with the main data set. 
use finaljjk4, clear
sort city_id
merge city_id using landroutes
tab _m
drop _m
sort city_id
merge city_id using landroutes2
tab _m
drop _m
* We express in km. 
replace dist2landroute = dist2landroute/1000
label var dist2landroute "Distance to a medieval land route"
gen dist2landroute_10 = (dist2landroute <= 10)
label var dist2landroute_10 "Dummy if distance to a medieval land route <= 10 km"
* We express in km. 
replace dist2landrouteint = dist2landrouteint/1000
label var dist2landrouteint "Distance to a medieval land route intersection"
gen dist2landrouteint_10 = (dist2landrouteint <= 10)
label var dist2landrouteint_10 "Dummy if distance to a medieval land route intersection <= 10 km"
save finaljjk4, replace
tab year, m

********************
*** HANSA CITIES ***
********************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

* This file uses information from Dollinger and Bairoch. 
clear
import excel "HansaDollingerBairoch.xlsx", sheet("Sheet1") firstrow
keep city_id Hansa ChiefTown Important 
ren Hansa hansa_all
ren ChiefTown chieftown
ren Important important
gen hansa_major = (chieftown == 1 | important == 1)
label var hansa_all "Dummy if any Hansa town"
label var chieftown "Dummy if chief Hansa town"
label var important "Dummy if important Hansa town"
label var hansa_major "Dummy if major Hansa town (chief + important)"
sort city_id
save hansa_new, replace

* This file uses information from updated souces. 
* We use both files and combine their information. 
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
keep city_id HansaFixed
ren HansaFixed HansaFixednew
sort city_id
save hansa_new_new, replace

* We merge with the main data set
use finaljjk4, clear
sort city_id 
merge city_id using hansa_new
drop if _m == 2
tab _m
tab city_jjk if _m == 1 & sample == 1
drop _m
replace hansa_all = 0 if hansa_all == .
replace chieftown = 0 if chieftown == .
replace important = 0 if important == .
replace hansa_major = 0 if hansa_major == .
tab year, m
save finaljjk4, replace
count

use finaljjk4, clear
sort city_id 
merge city_id using hansa_new2
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
sort city_id 
merge city_id using hansa_new_new
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
* We create our final Hansa variable. 
replace HansaFixed = HansaFixednew if HansaFixednew != .
codebook HansaFixed
tab city_jjk if HansaFixed == .
replace HansaFixed = 0 if HansaFixed == . 
codebook HansaFixed
label var HansaFixed "Dummy if any Hansa town - fixed"
drop HansaFixednew
tab year, m
save finaljjk4, replace
count

*************
*** FAIRS ***
*************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

* This file has the fair variable.
use econgeo_all, clear
keep city_id Fair
ren Fair fair
label var fair "Dummy if market fair in the city (source: Medieval Atlas)"
sort city_id
save fair, replace

* These files have updated information. 
* We use both of them.
clear
import excel "NewFairData.xlsx", sheet("BairochBoskerIDsMark1.csv") firstrow
keep city_id marketfair
label var marketfair "Dummy if market fair in the city"
sort city_id
save marketfair, replace
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
keep city_id Market_Fair
sort city_id
save fairnew, replace

* We combine with the main data set. 
use finaljjk4, clear
sort city_id 
merge city_id using marketfair
tab _m
drop if _m == 2
drop _m
sort city_id 
merge city_id using fair
tab _m
drop if _m == 2
drop _m
sort city_id 
merge city_id using fairnew
tab _m
drop if _m == 2
drop _m
codebook fair
* We create the main variable.
tab fair marketfair if year == 1400
gen fair_all = (fair == 1 | marketfair == 1 | Market_Fair == 1)
codebook fair_all
drop fair marketfair Market_Fair
* We keep this variable. 
label var fair_all "Dummy if market fair in the city (sources: all)"
codebook fair_all
tab year, m
save finaljjk4, replace

*****************
*** AQUEDUCTS ***
*****************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

* This file has the aqueduct variable.
use EconGeographynew, clear
keep city_id Daqueduct
ren Daqueduct DAqueduct
sort city_id
save Daqueductnew, replace

* We merge with the main data set. 
use finaljjk4, clear
sort city_id
merge city_id using Daqueductnew
tab _m
drop _m
* We express in km
replace DAqueduct = DAqueduct/1000
label var DAqueduct "Distance to an aqueduct"
gen aqueduct_10 = (DAqueduct <= 10)
label var aqueduct_10 "Dummy if aqueduct <= 10 km"
save finaljjk4, replace
tab year, m

********************
*** UNIVERSITIES ***
********************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

* This file has the university variable.
use econgeo_all, clear
keep city_id University
ren University university
sort city_id
save university, replace

* This file has updated information. 
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
keep city_id University
ren University universitynew
sort city_id
save university_new, replace

* We merge with the main data set. 
use finaljjk4, clear
sort city_id
merge city_id using university
tab city_jjk if _m == 1 & sample == 1
tab _m
drop if _m == 2
drop _m
replace university = 0 if university == .
sort city_id
merge city_id using university_new
tab _m
drop if _m == 2
drop _m
* We create our main variable 
replace university = universitynew if universitynew != .
drop universitynew
codebook university
label var university "Dummy if university before 14th century"
tab year, m
save finaljjk4, replace

**********************
*** CAPITAL CITIES ***
**********************

* See the web appendix for details on the sources used. 
* We created the variable in GIS. 

* These are the controls from the Bosker data set. 
* We keep two variables. 
use controls, clear
keep if year == 1300
keep city_id Bcapital Bcommune
sort city_id
save boskercontrols, replace

* We update this information with new information. 
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
keep city_id StateCapital
ren StateCapital StateCapitalnew
sort city_id
save statecap_new_new, replace

* We combine with the main data set. 
use finaljjk4, clear
sort city_id 
merge city_id using boskercontrols
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
* The cities that are not a capital city in the Bosker data set are probably not capital cities, so we replace the dummy by 0.
replace Bcapital = 0 if Bcapital == .
replace Bcommune = 0 if Bcommune == .
* We then update the information with the new information
sort city_id 
merge city_id using statecap_new_new
tab _m
drop if _m == 2
drop _m
replace Bcapital = StateCapitalnew if StateCapitalnew != .
codebook Bcapital
drop StateCapitalnew
tab year, m
label var Bcapital "Capital city dummy"
label var Bcommune "Commune dummy"
save finaljjk4, replace

*************************************************
*** MONARCHY AND OTHER POLITICAL INSTITUTIONS ***
*************************************************

* See the web appendix for details on the sources used. 

*** Political boundaries in 1300, large state, and parliamentary activity ***

* We used various sources to create this file. 
clear
import excel "City_Sov_State-Parliament.xlsx", sheet("City_Sov_State.txt") firstrow
drop if city_id == .
ren short_name entity1300
* We keep the variables we need. 
keep city_id entity republican monarchy Parliament
* Entity c. 1300. 
tab entity1300
replace entity1300 = "Aragon" if entity1300 == "Arag√≥n"
replace entity1300 = "Brunswick-Luneburg" if entity1300 == "Brunswick-L√ºneburg"
replace entity1300 = "Breifne" if entity1300 == "Br√©ifne"
replace entity1300 = "Liege" if entity1300 == "Li√®ge"
replace entity1300 = "Tir Eogain" if entity1300 == "T√≠r E√≥gain"
replace entity1300 = "Waldstatte" if entity1300 == "Waldst√§tte"
replace entity1300 = "Genoa" if city_id == 1573
gen entity1300_old = entity1300
replace entity1300 = string(city_id) if entity1300 == "Small States"
* Small states: These are city states that belong to the Holy Roman Empire
* We could either consider as one entity, i.e. a federation, or independent cities.
label var entity1300 "Political Entity of the City in 1300"
label var entity1300_old "Political Entity of the City in 1300 (small states = HRE)"
* The other variables are described below.
label var monarchy "The city belongs to a monarchy in 1300"
label var republican "The city is a republic"
gen federation1300 = (monarchy == 0 & republican == 0)
label var federation1300 "The city does not belong to a monarchy and is not a republic"
ren Parliament1300 parliament1300
replace parliament1300 = 0 if parliament1300 == .
gen parliament1300_yn = (parliament1300 > 0 & parliament1300 != .)
label var parliament1300 "Parliamentary activity (number of sessions) in 1300-1400"
label var parliament1300_yn "Parliamentary activity: yes (number of sessions > 0) in 1300-1400"
order city_id entity1300 entity1300_old monarchy republican federation parliament*
sort city_id
save polboundaries1300, replace

* Entity c. 1300
* We update some entity names.  
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
ren FE_name entity1300
tab entity1300
replace entity1300 = "Savoy" if entity1300 == "County of Savoy"
replace entity1300 = "Schwerin" if entity1300 == "County of Schwerin"
replace entity1300 = "Aragon" if entity1300 == "Crown of Aragon"
replace entity1300 = "Castile" if entity1300 == "Kingdom of Castile"
replace entity1300 = "France" if entity1300 == "Kingdom of France"
replace entity1300 = "Norway" if entity1300 == "Kingdom of Norway"
replace entity1300 = "Sicily-Naples" if entity1300 == "Kingdom of Sicily in Naples"
replace entity1300 = "England" if entity1300 == "kingdom of England"
replace entity1300 = "Small States" if entity1300 == "Small States of the Holy Roman Empire"
gen entity1300_old = entity1300
replace entity1300 = string(city_id) if entity1300 == "Small States"
keep city_id entity1300 entity1300_old
ren entity1300 entity1300_new
ren entity1300_old entity1300_old_new
sort city_id
save entity1300_new, replace

* Monarchy and republican
* We use additional information. 
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
ren Monarchy Monarchynew
ren republican republicannew
keep city_id Monarchynew republicannew
sort city_id
save monarepu_new, replace

* Parliament
* We use additional information. 
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
ren Parliament1300 parliamentnew
ren Parliamentary parliamentarynew
keep city_id parliamentnew parliamentarynew
sort city_id
save parlia_new, replace

* We combine with the main data set. 
use finaljjk4, clear
tab year
tab countryname
sort city_id
merge city_id using polboundaries1300
tab _m
drop if _m == 2
drop _m 
tab year
tab countryname
sort city_id
merge city_id using entity1300_new
tab _m
drop _m 
replace entity1300 = entity1300_new if entity1300_new != ""
replace entity1300_old = entity1300_old_new if entity1300_old_new != ""
codebook entity1300
codebook entity1300_old
drop entity1300_new entity1300_old_new
sort city_id
merge city_id using monarepu_new
tab _m
drop _m 
replace monarchy = Monarchynew if Monarchynew != .
codebook monarchy
drop Monarchynew
replace republican1300 = republicannew if republicannew != .
codebook republican1300
drop republicannew
sort city_id
merge city_id using parlia_new
tab _m
drop _m 
replace parliament1300 = parliamentnew if parliamentnew != .
codebook parliament1300
drop parliamentnew
replace parliament1300_yn = parliamentarynew if parliamentarynew != .
codebook parliament1300_yn
drop parliamentarynew
sum parlia*
tab year, m
save finaljjk4, replace

*** Entity ***

* We modify the entity variable with additional information
use finaljjk4, clear
sort city_id 
merge city_id using entity1300_Mark_extra
tab _m
drop _m
replace entity1300 = "Sweden" if city_jjk == "FALUN" | city_jjk == "GAEVLE"
replace entity1300_old = entity1300extra if entity1300_old == ""
replace entity1300 = entity1300extra if entity1300 == ""
replace entity1300 = string(city_id) if entity1300_old == "Small States"
drop entity1300extra
save finaljjk4, replace

*** Distance to parliament ***

* We recreated this variable in GIS. 
use DParliament, clear
replace DParliament = DParliament/1000
sum DParliament
label var DParliament "Distance to nearest parliament (km)"
sort city_id
save distparlia_new, replace

use finaljjk4, clear
sort city_id
merge city_id using distparlia_new
tab _m
drop if _m == 2
drop _m 
sum DParliament, d
gen lDParliament = log(DParliament)
label var DParliament "Euclidean distance to a Parliament"
label var lDParliament "Log Euclidean distance to a Parliament"
tab year, m
save finaljjk4, replace

*** Autonomous Cities ***

* Data from Stasavage
clear
import excel "autonomous.xlsx", sheet("City_Sov_State.txt") firstrow clear
keep city_id Auton
ren Auton autonomous1300
replace autonomous1300 = 0 if autonomous1300 == .
label var autonomous1300 "Degree of autonomy of the city in 1300-1400 (e.g., 0.49 => 49% of the century)"
gen autonomous1300_yn = (autonomous1300 > 0 & autonomous1300 != .)
label var autonomous1300_yn "Autonomous city at one point in 1300-1400"
sort city_id
save autonomous, replace

* We update the information with some additional information.
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
ren Autonomous Autonomousnew
keep city_id Autonomousnew
sort city_id
save auton_new, replace

* We merge with the main data set. 
use finaljjk4, clear
sort city_id
merge city_id using autonomous
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
sort city_id
merge city_id using auton_new
tab _m
drop if _m == 2
drop _m
replace autonomous1300_yn = Autonomousnew if Autonomousnew != .
drop Autonomousnew
* This data is missing for the Northern European cities
* We create our main variable. 
gen representative1300 = (republican == 1 | autonomous1300_yn)
replace representative1300 = . if republican == . | autonomous1300_yn == .
label var representative "Dummy if city had some autonomy at one point (either republic or local autonomy)"
tab year, m
save finaljjk4, replace

****************************
*** DISTANCE TO A BATTLE ***
****************************

* See the web appendix for details on the sources used. 
* This is the full battles data set we collected. 
clear
import excel "battles_remi_v2.xls", sheet("Sheet1") firstrow
ren Year year
ren number infantry_num
keep Longitude Lattiude battle_num year infantry_num
ren Lattiude bat_lat 
ren Longitude bat_lon
save battles_full, replace

** 1400, 1600 **

foreach X in 1400 1600 {
use battles_full, clear
keep if year >= 1353 & year <= `X'
keep bat_lat bat_lon infantry_num
save battles_1350`X'_temp, replace
use finaljjk4, clear
keep if samplemort == 1 & mortality != . & year == `X'
count
* 165 
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_1350`X'_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
keep city_id dist infantry_num
foreach Z in 50 100 {
gen battle`Z'_`X' = (dist <= `Z' & dist != .)
gen infyn`Z'_`X' = (dist <= `Z' & dist != . & infantry_num != .)
gen infnum`Z'_`X' = infantry_num if dist <= `Z' & dist != .
}
collapse (sum) battle*, by(city_id)
foreach Y of varlist battle* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}
label var battle50_`X'_yn "Dummy if battles 50km in 1300-`X'"
label var battle50_`X'_sum "Number of battles 50km in 1300-`X'"
label var battle100_`X'_yn "Dummy if battles 100km in 1300-`X'"
label var battle100_`X'_sum "Number of battles 100km in 1300-`X'"
sort city_id
save battle_ctrls_`X', replace
sum *yn *sum
}

** 1347, 1352 **

use battles_full, clear
keep if year >= 1300 & year <= 1347
keep bat_lat bat_lon infantry_num
save battles_13001347_temp, replace
use finaljjk4, clear
keep if samplemort == 1 & mortality != . & year == 1400
count
* 165 
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_13001347_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
keep city_id dist infantry_num
foreach Z in 50 100 {
gen battle`Z'_1347 = (dist <= `Z' & dist != .)
gen infyn`Z'_1347 = (dist <= `Z' & dist != . & infantry_num != .)
gen infnum`Z'_1347 = infantry_num if dist <= `Z' & dist != .
}
collapse (sum) battle*, by(city_id)
foreach Y of varlist battle* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}
label var battle50_1347_yn "Dummy if battles 50km in 1300-1347"
label var battle50_1347_sum "Number of battles 50km in 1300-1347"
label var battle100_1347_yn "Dummy if battles 100km in 1300-1347"
label var battle100_1347_sum "Number of battles 100km in 1300-1347"
sort city_id
save battle_ctrls_13001347, replace
sum *yn *sum

* Sum of battles
use battles_full, clear
keep if year >= 1300 & year <= 1352
keep bat_lat bat_lon infantry_num
save battles_13001352_temp, replace
use finaljjk4, clear
keep if samplemort == 1 & mortality != . & year == 1400
count
* 165 
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_13001352_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
keep city_id dist infantry_num
foreach Z in 50 100 {
gen battle`Z'_1352 = (dist <= `Z' & dist != .)
gen infyn`Z'_1352 = (dist <= `Z' & dist != . & infantry_num != .)
gen infnum`Z'_1352 = infantry_num if dist <= `Z' & dist != .
}
collapse (sum) battle*, by(city_id)
foreach Y of varlist battle* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}
label var battle50_1352_yn "Dummy if battles 50km in 1300-1352"
label var battle50_1352_sum "Number of battles 50km in 1300-1352"
label var battle100_1352_yn "Dummy if battles 100km in 1300-1352"
label var battle100_1352_sum "Number of battles 100km in 1300-1352"
sort city_id
save battle_ctrls_13001352, replace
sum *yn *sum

** Distance to a battle **

foreach X in 1347 1352 {
use battles_full, clear
keep if year >= 1300 & year <= `X'
keep bat_lat bat_lon
save battles_1300`X'_temp, replace
use finaljjk4, clear
keep if year == 1300
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_1300`X'_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
collapse (min) dist, by(city_id)
keep city_id dist
ren dist dist2bat1300`X'
label var dist2bat1300`X' "Euclidean distance to a battle 1300`X'"
sort city_id
save dist2bat1300`X', replace
}

*** 1300-1350 ***

use battles_full, clear
keep if year >= 1300 & year <= 1350
keep bat_lat bat_lon
save battles_13001350_temp, replace

use finaljjk4, clear
keep if year == 1300
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_13001350_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
collapse (min) dist, by(city_id)
keep city_id dist
ren dist dist2bat13001350
label var dist2bat13001350 "Euclidean distance (km) to a battle 1300-1350"
sort city_id
save dist2bat13001350, replace

*** HUNDRED YEAR WAR - 100 YEAR WAR ***

** 1337-1352 **
* Consider full period 1337-1352 (also 1337-1353) **
clear
import excel "battles 1335 1355 mark.xls", sheet("Sheet1") firstrow clear
tab Year
* not 1353
keep if hundredyrwar == 1
count
tab Year
* 1337-1352 
destring TotalNumbers, replace
ren TotalNumbers infantry_num
ren Longitude bat_lon
ren Lattiude bat_lat
keep bat_lat bat_lon infantry_num
save battles_100yrwar_1352_temp, replace
* Distance 
use finaljjk4, clear
keep if samplemort == 1 & mortality != . & year == 1300
count
* 165 
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_100yrwar_1352_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
keep city_id dist infantry_num
foreach Z in 50 100 {
gen battle`Z'_100yrwar_1352 = (dist <= `Z' & dist != .)
gen infyn`Z'_100yrwar_1352 = (dist <= `Z' & dist != . & infantry_num != .)
gen infnum`Z'_100yrwar_1352 = infantry_num if dist <= `Z' & dist != .
}
collapse (sum) battle*, by(city_id)
foreach Y of varlist battle* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}
label var battle50_100yrwar_1352_sum "Number of battles 1352, 50km"
label var battle100_100yrwar_1352_sum "Number of battles 1352, 100km"
label var battle50_100yrwar_1352_yn "Dummy if battles 1352, 50km"
label var battle100_100yrwar_1352_yn "Dummy if battles 1352, 100km"
sort city_id
save hundredyrwar1352_ctrls, replace
sum *yn *sum

** 1337-1347 **
* Consider full period 1337-1347 **
clear
import excel "battles 1335 1355 mark.xls", sheet("Sheet1") firstrow clear
tab Year
* not 1353
keep if hundredyrwar == 1
count
tab Year
keep if Year <= 1347
* 1337-1352 
destring TotalNumbers, replace
ren TotalNumbers infantry_num
ren Longitude bat_lon
ren Lattiude bat_lat
keep bat_lat bat_lon infantry_num
save battles_100yrwar_1347_temp, replace
* Distance 
use finaljjk4, clear
keep if samplemort == 1 & mortality != . & year == 1300
count
* 165 
keep city_id longitude latitude
ren longitude city_lon
ren latitude city_lat
* 1,801
cross using battles_100yrwar_1347_temp
geodist city_lat city_lon bat_lat bat_lon, gen(dist)
keep city_id dist infantry_num
foreach Z in 50 100 {
gen battle`Z'_100yrwar_1347 = (dist <= `Z' & dist != .)
gen infyn`Z'_100yrwar_1347 = (dist <= `Z' & dist != . & infantry_num != .)
gen infnum`Z'_100yrwar_1347 = infantry_num if dist <= `Z' & dist != .
}
collapse (sum) battle*, by(city_id)
foreach Y of varlist battle* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}
label var battle50_100yrwar_1347_sum "Number of battles 1347, 50km"
label var battle100_100yrwar_1347_sum "Number of battles 1347, 100km"
label var battle50_100yrwar_1347_yn "Dummy if battles 1347, 50km"
label var battle100_100yrwar_1347_yn "Dummy if battles 1347, 100km"
sort city_id
save hundredyrwar1347_ctrls, replace
sum *yn *sum

** We combine with the main file **

use finaljjk4, clear
sort city_id 
merge city_id using battle_ctrls_1400, update
tab _m
tab city_jjk if _m == 1
drop _m
sort city_id 
merge city_id using battle_ctrls_1600, update
tab _m
tab city_jjk if _m == 1
drop _m
sort city_id 
merge city_id using battle_ctrls_13001347, update
tab _m
tab city_jjk if _m == 1
drop _m
sort city_id 
merge city_id using battle_ctrls_13001352, update
tab _m
tab city_jjk if _m == 1
drop _m
desc battle*sum, full
sort city_id 
merge city_id using dist2bat13001347, update
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
sort city_id 
merge city_id using dist2bat13001352, update
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
sort city_id 
merge city_id using dist2bat13001350, update
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
sort city_id 
merge city_id using hundredyrwar1352_ctrls, update
tab _m
tab city_jjk if _m == 1
drop _m
sort city_id 
merge city_id using hundredyrwar1347_ctrls, update
tab _m
tab city_jjk if _m == 1
drop _m
* We create additional variables
gen dist2bat13001347_50 = (dist2bat13001347 <= 50)
label var dist2bat13001347_50 "Dummy if distance to a battle 1300-1347 below 50km"
gen dist2bat13001347_100 = (dist2bat13001347 <= 100)
label var dist2bat13001347_100 "Dummy if distance to a battle 1300-1347 below 100km"
gen dist2bat13001352_50 = (dist2bat13001352 <= 50)
label var dist2bat13001352_50 "Dummy if distance to a battle 1300-1352 below 50km"
gen dist2bat13001352_100 = (dist2bat13001352 <= 100)
label var dist2bat13001352_100 "Dummy if distance to a battle 1300-1352 below 100km"
tab battle100_1347_sum
tab battle50_1347_sum
gen lbattle100_1347_sum = log(battle100_1347_sum+1)
label var lbattle100_1347_sum "Log number of battles 1300-1347 below 100km"
gen lbattle50_1347_sum = log(battle50_1347_sum+1)
label var lbattle50_1347_sum "Log number of battles 1300-1347 below 50km"
gen dist2bat13001350_100 = (dist2bat13001350 <= 100)
label var dist2bat13001350_100 "Dummy if distance to a battle 1300-1350 below 100km"
save finaljjk4, replace
tab year

******************
*** BISHOPRICS *** 
******************

* We first use the data from Bosker. 
use controls, clear
keep if year == 1300
keep city_id Bbishop Barchbishop
sort city_id
save bishopbosker, replace

* In addition, we collected data on bishoprics and archbishoprics that were missing in Bosker's data set.
clear
import excel "Missing Bishoprics.xlsx", sheet("Missing Bishoprics.csv") firstrow
keep city_id DateofFounding Archbishopric Bishoprics
ren Archbishopric archbishopric
ren Bishoprics bishopric
replace archbishopric = 0 if DateofFounding > 1317
replace bishopric = 0 if DateofFounding > 1317
* We just use the ones existing in 1317 (i.e., round 1300)
drop Dateof
label var archbishopric "Dummy if archbishopric (Mark)"
label var bishopric "Dummy if bishopric (Mark)"
sort city_id
save bishoprics, replace

* These are additional observations we collected separately.
import excel "non_arc_variables_mark_new2.xls", sheet("Feuil1") firstrow clear
drop if city_id == .
keep city_id *ishopric*
ren Bishoprics Bishopricsnew
ren ArchBishopric ArchBishopricnew
sort city_id
save bishop_new_new, replace

* We combine with the main data set *
use finaljjk4, clear
sort city_id
merge city_id using bishopbosker
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
drop _m
sort city_id
merge city_id using bishoprics
tab _m
tab city_jjk if _m == 1 & sample == 1
drop if _m == 2
* We create the main variables.
replace archbishopric = 0 if archbishopric == .
replace bishopric = 0 if bishopric == .
drop _m
gen bishopric2 = Bbishop 
replace bishopric2 = bishopric if bishopric2 == .
gen archbishopric2 = Barchbishop 
replace archbishopric2 = archbishopric if archbishopric2 == .
sort city_id
merge city_id using bishop_new_new
tab _m
drop if _m == 2
drop _m
replace archbishopric2 = ArchBishopricnew if ArchBishopricnew != .
replace bishopric2 = Bishopricsnew if Bishopricsnew != .
codebook archbishopric2 bishopric2
label var archbishopric2 "Dummy if archbishopric (Bosker+Mark)"
label var bishopric2 "Dummy if bishopric (Bosker+Mark)"
drop Bbishop-bishopric Bishopricsnew ArchBishopricnew
tab year, m
save finaljjk4, replace
* If we decide to use these variables, we should use archbishopric2 and bishopric2.

**********************************
* DISTANCE TO MESSINA FOR THE IV *
**********************************

use finaljjk4, clear
keep if city_jjk == "MESSINA"
keep longitude latitude
ren longitude messina_lon 
ren latitude messina_lat
save messina_euclidean, replace

use finaljjk4, clear
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using messina_euclidean
geodist city_lat city_lon messina_lat messina_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist messina_euclidean
sort countryname city_jjk 
save messina_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using messina_euclidean
tab _m
drop if _m == 2
drop _m
codebook messina_euclidean
gen lmessina_euclidean = log(messina_euclidean)
label var messina_euclidean "Euclidean distance to Messina"
label var lmessina_euclidean "Log Euclidean distance to Messina"
save finaljjk4, replace
count

**************************************
* DISTANCE TO EUROPEAN + MENA CITIES *
**************************************

* This is the MENA data set from the Bosker paper. 
use boskercities1300, clear
save boskercities1300, replace

* We create a super-list of our cities >= 1K in 1300 + the Bosker cities.
use finaljjk, clear
keep if year == 1300
keep if pop_bairoch >= 1 & pop_bairoch != .
ren pop_bairoch citypop_le10
ren lat lat_bosker 
ren long long_bosker
count
* 522
drop if city_jjk == "MESSINA"
append using boskercities1300
replace city_jjk = city if city_jjk == ""
replace countryname = country if countryname == ""
ren city_jjk citydest
ren countryname countrydest
keep citypop_le10 long_bosker lat_bosker city_id citydest countrydest
ren city_id dest
save euroboskercities1300_1, replace
count
* 570

* We obtain the average distance
use finaljjk4, clear
keep if year == 1300
keep countryname city_jjk city_id longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using euroboskercities1300_1
drop if city_id == dest
geodist city_lat city_lon lat_bosker long_bosker, gen(dist)
gen lcitypop_le10 = log(citypop_le10)
collapse (mean) dist [w=lcitypop_le10], by(countryname city_jjk city_id)
ren dist euromena1_wl_euclidean
gen leuromena1_wl_euclidean = log(euromena1_wl_euclidean)
label var euromena1_wl_euclidean "Average Euclidean distance (km) to all Europe + Mena cities"
label var leuromena1_wl_euclidean "Log average Euclidean distance to all Europe + Mena cities"
sort countryname city_jjk 
save euromena1_wl_euclidean, replace

use finaljjk4, clear
foreach X in wl {
sort countryname city_jjk 
merge countryname city_jjk using euromena1_`X'_euclidean, update
tab _m
drop if _m == 2
drop _m
} 
save finaljjk4, replace
count

**************************
*** OTHER IV VARIABLES ***
**************************

use finaljjk4, clear

* We use the start month + 3.5 months
gen start_month_35 = start_month+3.5
replace start_month_35 = start_month_35 - 12 if start_month_35 >= 13
label var start_month_35 "Start month of the Black Death + 3.5 months"

* Duration of the Black Death in the city
gen duration = end_month - start_month if start_year == end_year
replace duration = 12 + end_month - start_month if start_year < end_year
replace duration = duration + 1 if duration != .
replace duration = . if duration < 3
label var duration "Duration of the Black Death (months) in the city"

save finaljjk4, replace

*******************************************
*** OTHER MARKET ACCESS SHOCK VARIABLES ***
*******************************************

*** Average mortality rate of other cities within the bottom 10% of Euclidean distance to the city. ***

* We keep the 165 cities of the main regression sample.
use finaljjk4, clear
keep if samplemort == 1 & year == 1300
keep city_jjk city_id mortality
sort city_jjk
count
* 165
codebook city_jjk
* 165, ok
levelsof city_jjk, local(list_city_jjk)
ren city_jjk origin_city_name
ren city_id origin_city
sort origin_city
save MA_list_165, replace

* This is the matrix of travel costs, which we combine with the list.
use travelcosts_1801x1801, clear
ren ocity origin_city
ren dcity dest_city
sort origin_city
merge origin_city using MA_list_165
tab _m
drop if _m == 1
drop _m
codebook origin_city dest_city
* ok, 165 x 1801 = 297165
sort origin_city_name dest_city
merge origin_city_name dest_city using MA_non_274
tab _m
drop if _m == 1
drop _m
save ma_nom_165x1527, replace

* We keep some of the variables.
use ma_nom_165x1527, clear
keep origin_city dest_city rastervalu
sort origin_city dest_city
save ma_nom_165x1527_v2, replace

* We add the extrapolated mortality rates.
use finaljjk4, clear
keep if samplemort == 1 & year == 1300
count
* 165
codebook city_id
* 165
keep countryname city_id longitude latitude entity* 
ren longitude city_long
ren latitude city_lat
cross using death_rates_1801e
count
drop if city_id == city_id_1801e
count
gen origin_city = city_id
gen dest_city = city_id_1801e
sort origin_city dest_city
merge origin_city dest_city using ma_nom_165x1527_v2
tab _m
drop _m
gen mort_1801eeo = mort_1801e
replace mort_1801eeo = rastervalu if rastervalu != . 
geodist city_lat city_long lat_1801e long_1801e, gen(dist)
sort city_id dist
bysort city_id: gen rank = _n
ren city_id_1801e city_id_1801eeo
keep countryname* city_id dist rank mort_1801eeo pop_05 entity*
save matrix_for_shock_1801eeo, replace

* We use the matrix and obtain the pop-weighted average mortality rate of the other cities. 
foreach X in 332 {
use matrix_for_shock_1801eeo, clear
keep if dist <= `X'
collapse (mean) mort_1801eeo [iw= pop_05], by(city_id)
ren mort_1801e mortdist`X'
sort city_id
save shock_dist_`X'_1801eeo, replace 
}

* We merge with the main data set 
use finaljjk4, clear
foreach X in 332 {
sort city_id
merge city_id using shock_dist_`X'_1801eeo
tab _m
drop if _m == 2
drop _m
}
desc mortdist332
label var mortdist332 "Avg mortality other cities within the bottom 10% of Euclidean distance"
save finaljjk4, replace

*** Average mortality rate of other cities in the same state (1300) ***

use matrix_for_shock_1801eeo, clear
keep if entity1300_old == entity1300_old_1801e
collapse (mean) mort_1801eeo [iw= pop_05], by(city_id)
ren mort mort_entity1300_old
sort city_id
save mort_entity1300_old_1801eeo, replace

* We merge with the main data set 
use finaljjk4, clear
sort city_id
merge city_id using mort_entity1300_old_1801eeo
tab _m
drop if _m == 2
drop _m
label var mort_entity1300_old "Avg mortality other cities in same state (1300)"
save finaljjk4, replace

*********************************
* OTHER MARKET ACCESS VARIABLES *
*********************************

** INCLUDING "OWN" CITY IN THE MARKET ACCESS CALCULATIONS *

use travelcosts_1801x1801, clear
ren ocity origin_city
ren dcity dest_city
sort dest_city
merge dest_city using MA
tab _m
drop _m
gen own_cost1 = .20999999
gen own_speed1 = .5
gen own_cost2 = .50999999
gen own_speed2 = .23999999
foreach X in cost1 cost2 speed1 speed2 {
replace travelcost_`X' = own_`X' if origin == dest
}
sort origin dest
order origin dest pop* mortality mortality274
foreach X in cost1 cost2 speed1 speed2 {
foreach Y in 1300 {
gen ma`Y'ownnew_`X'_38 = pop`Y'/(travelcost_`X')^(3.8)
}
}
collapse (sum) ma*, by(origin)
foreach X of varlist ma*_*_* {
gen l`X' = log(`X')
}
foreach X in cost1 cost2 speed1 speed2 {
label var ma1300ownnew_`X'_38 "Market access including own city (costs = `X')"
label var lma1300ownnew_`X'_38 "Log market access including own city (costs = `X')"
}
ren origin city_id
sort city_id
save ma_1801x1801_ownnew, replace
count
* 1801

* We merge with the main data set 
use finaljjk4, clear
sort city_id
merge city_id using ma_1801x1801_ownnew
tab _m
drop if _m == 2
drop _m
save finaljjk4, replace

*** Market access to Europe incl. Eastern Europe + MENA ***

* Market access to European cities including from Eastern Europe
use travelcosts_1801x2279, clear
* So 1801 * (1801+478) = 4,104,479 observations. 
ren ocity origin_city
ren dcity dest_city
sort dest_city
merge dest_city using MAtemp
tab _m
drop _m
* We drop own city
drop if origin == dest
sort origin dest
order origin dest pop* 
foreach X in cost1 cost2 speed1 speed2 {
foreach Y in 1300 {
gen ma`Y'east10_`X'_38 = pop`Y'/(travelcost_`X')^(3.8)
}
}
replace pop1300 = . if pop1300 < 10
sum pop1300 if pop1300 >= 1
collapse (sum) ma*, by(origin)
foreach X of varlist ma*_*_* {
gen l`X' = log(`X')
}
foreach X in cost1 cost2 speed1 speed2 {
foreach Y in 1300 {
label var ma`Y'east10_`X'_38 "Market access to Europe (incl. East Europe)"
label var lma`Y'east10_`X'_38 "Log market access to Europe (incl. East Europe)"
}
}
ren origin city_id
sort city_id
save ma_1801x2279_10, replace
count

* Market access to all European cities, including Eastern Europe
use ma_1801x2279_10, clear
* Adding the market access to 49 MENA cities
sort city_id
merge city_id using ma_1801x49
tab _m
drop _m
drop lma* 
foreach X in cost1 cost2 speed1 speed2 {
foreach Y in 1300 {
gen ma`Y'both_`X'_38 = ma`Y'east10_`X'_38 + ma`Y'menaonly_`X'_38 
}
}
foreach X of varlist ma*_*_* {
gen l`X' = log(`X')
}
drop *menaonly*
foreach X in cost1 cost2 speed1 speed2 {
foreach Y in 1300 {
label var ma`Y'both_`X'_38 "Market access to Europe (incl. East Europe) + MENA"
label var lma`Y'both_`X'_38 "Log market access to Europe (incl. East Europe) + MENA"
}
}
sort city_id
save ma_1801x2328_10, replace

* We merge with the main data set 
use finaljjk4, clear
sort city_id
merge city_id using ma_1801x2279_10
tab _m
drop if _m == 2
drop _m
sort city_id
merge city_id using ma_1801x2328_10
tab _m
drop if _m == 2
drop _m
save finaljjk4, replace

****************************
* DISTANCE TO OTHER CITIES *
****************************

*** GENOA ***

use finaljjk4, clear
keep if year == 1400
keep if city_jjk == "GENOVA"
keep longitude latitude
ren longitude genoa_lon 
ren latitude genoa_lat
save genoa_euclidean, replace

use finaljjk4, clear
keep if year == 1400
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using genoa_euclidean
geodist city_lat city_lon genoa_lat genoa_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist genoa_euclidean
sort countryname city_jjk 
save genoa_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using genoa_euclidean
tab _m
drop _m
codebook genoa_euclidean
gen lgenoa_euclidean = log(genoa_euclidean)
label var genoa_euclidean "Euclidean distance to Genoa"
label var lgenoa_euclidean "Log Euclidean distance to Genoa"
save finaljjk4, replace
count

** VIENNA **

use finaljjk4, clear
keep if year == 1400
keep if city_jjk == "WIEN"
keep longitude latitude
ren longitude vienna_lon 
ren latitude vienna_lat
save vienna_euclidean, replace

use finaljjk4, clear
keep if year == 1400
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using vienna_euclidean
geodist city_lat city_lon vienna_lat vienna_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist vienna_euclidean
sort countryname city_jjk 
save vienna_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using vienna_euclidean
tab _m
drop _m
codebook vienna_euclidean
gen lvienna_euclidean = log(vienna_euclidean)
label var vienna_euclidean "Euclidean distance to Vienna"
label var lvienna_euclidean "Log Euclidean distance to Vienna"
save finaljjk4, replace
count

** PRAGUE **

foreach X in prague {
use finaljjk4, clear
keep if year == 1400
keep if city_jjk == "PRAHA"
keep longitude latitude
ren longitude `X'_lon 
ren latitude `X'_lat
save `X'_euclidean, replace

use finaljjk4, clear
keep if year == 1400
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using `X'_euclidean
geodist city_lat city_lon `X'_lat `X'_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist `X'_euclidean
sort countryname city_jjk 
save `X'_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using `X'_euclidean
tab _m
drop _m
codebook `X'_euclidean
gen l`X'_euclidean = log(`X'_euclidean)
label var `X'_euclidean "Euclidean distance to Prague"
label var l`X'_euclidean "Log Euclidean distance to Prague"
save finaljjk4, replace
count
}

** LEIPZIG **

foreach X in leipzig {
use finaljjk4, clear
keep if year == 1400
keep if city_jjk == "LEIPZIG"
keep longitude latitude
ren longitude `X'_lon 
ren latitude `X'_lat
save `X'_euclidean, replace

use finaljjk4, clear
keep if year == 1400
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using `X'_euclidean
geodist city_lat city_lon `X'_lat `X'_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist `X'_euclidean
sort countryname city_jjk 
save `X'_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using `X'_euclidean
tab _m
drop _m
codebook `X'_euclidean
gen l`X'_euclidean = log(`X'_euclidean)
label var `X'_euclidean "Euclidean distance to Leipzig"
label var l`X'_euclidean "Log Euclidean distance to Leipzig"
save finaljjk4, replace
count

corr l`X'_euclidean lmessina_euclidean [w=pop1300] if year == 1400 & mortality != .

}

** VISBY **

foreach X in visby {
use finaljjk4, clear
keep if year == 1400
keep if city_jjk == "VISBY"
keep longitude latitude
ren longitude `X'_lon 
ren latitude `X'_lat
save `X'_euclidean, replace

use finaljjk4, clear
keep if year == 1400
keep countryname city_jjk longitude latitude 
ren longitude city_lon
ren latitude city_lat
cross using `X'_euclidean
geodist city_lat city_lon `X'_lat `X'_lon, gen(dist)
collapse (min) dist, by(countryname city_jjk)
ren dist `X'_euclidean
sort countryname city_jjk 
save `X'_euclidean, replace

use finaljjk4, clear
sort countryname city_jjk 
merge countryname city_jjk using `X'_euclidean
tab _m
drop _m
codebook `X'_euclidean
gen l`X'_euclidean = log(`X'_euclidean)
label var `X'_euclidean "Euclidean distance to Visby"
label var l`X'_euclidean "Log Euclidean distance to Visby"
save finaljjk4, replace
count

corr l`X'_euclidean lmessina_euclidean [w=pop1300] if year == 1400 & mortality != .

}

***************************
*** PLAGUE REOCCURENCES ***
***************************

* We use the Biraben data set. 
* We consider the two periods 1353-1400 and 1353-1600. 
foreach X in 1400 1600 {
use "BirabenFinal_stata12.dta", clear
keep if YEAR >= 1353 & YEAR <= `X'
ren LATITUDE bir_lat
ren LONGITUDE bir_long
ren YEAR bir_year
keep bir_year Biraben_id bir_lat bir_long 
save biraben_`X', replace
count
}

* We combine with the main data set.
foreach X in 1400 1600 {
use finaljjk4, clear 
keep if samplemort == 1 & mortality != . & year == `X'
count
* 165 
ren longitude city_lon
ren latitude city_lat
* We create the distance to the Biraben events. 
cross using biraben_`X'
geodist city_lat city_lon bir_lat bir_lon, gen(dist)
gen reoccur10_`X' = (dist <= 10 & dist != .)
gen reoccur25_`X' = (dist <= 25 & dist != .)
gen reoccur50_`X' = (dist <= 50 & dist != .)
gen reoccur100_`X' = (dist <= 100 & dist != .)
collapse (sum) reoccur*, by(city_id)
foreach Y of varlist reoccur* {
gen `Y'_yn = (`Y' >= 1 & `Y' != .)  
ren `Y' `Y'_sum
}

foreach N in 10 25 50 100 {
label var reoccur`N'_`X'_yn "Dummy if reoccurence, `N' km, 1353-`X'"
label var reoccur`N'_`X'_sum "Sum of reoccurences, `N' km, 1353-`X'"
}
sort city_id
save reoccur_ctrls_`X', replace
sum *yn *sum
}

use finaljjk4, clear 
* 165
sort city_id 
merge city_id using reoccur_ctrls_1400
tab _m
tab city_jjk if _m == 1
drop _m
sort city_id 
merge city_id using reoccur_ctrls_1600
tab _m
tab city_jjk if _m == 1
drop _m
save finaljjk4, replace 

***************
*** FAMINES ***
***************

*** FAMINES AT THE COUNTRY LEVEL ***

* We created "Famines_Wikipedia.csv" ourselves
* Sources are described in the web appendix. 

* From 1300
foreach X in 1352 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
replace countryname = "The Netherlands" if countryname == "Low Countries"
drop if countryname == "Europe" | countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= 1300 & year <= `X'
tab countryname 
collapse (sum) famine*, by(countryname)
ren famine fam_cntry_1300`X'
ren famine_sev fam_cntry_sev_1300`X'
sort countryname
save famines_country_1300_`X', replace
}

* Up to 1400
foreach X in 1353 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
replace countryname = "The Netherlands" if countryname == "Low Countries"
drop if countryname == "Europe" | countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= `X' & year <= 1400
tab countryname 
collapse (sum) famine*, by(countryname)
ren famine fam_cntry_`X'1400
ren famine_sev fam_cntry_sev_`X'1400
sort countryname
save famines_country_`X'_1400, replace
}

* Up to 1600
foreach X in 1353 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
replace countryname = "The Netherlands" if countryname == "Low Countries"
drop if countryname == "Europe" | countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= `X' & year <= 1600
tab countryname 
collapse (sum) famine*, by(countryname)
ren famine fam_cntry_`X'1600
ren famine_sev fam_cntry_sev_`X'1600
sort countryname
save famines_country_`X'_1600, replace
}

use finaljjk4, clear 
* 165
sort countryname
merge countryname using famines_country_1300_1352
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
sort countryname 
merge countryname using famines_country_1353_1400
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
sort countryname 
merge countryname using famines_country_1353_1600
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
label var fam_cntry_13001352 "Number of famines in 1300-1352"
label var fam_cntry_sev_13001352 "Number of severe famines in 1300-1352"
label var fam_cntry_13531400 "Number of famines in 1353-1400"
label var fam_cntry_sev_13531400 "Number of severe famines in 1353-1400"
label var fam_cntry_13531600 "Number of famines in 1353-1600"
label var fam_cntry_sev_13531600 "Number of severe famines in 1353-1600"
save finaljjk4, replace 

*** FAMILES AT A LOWER LEVEL ***

import delimited "Famines_Wikipedia.csv", clear 
tab location if year <= 1353
* All countries

foreach X in 1352 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
keep if countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
tab countryname
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= 1300 & year <= `X'
tab countryname
collapse (sum) famine*, by(countryname)
ren famine fam_subloc_1300`X'
ren famine_sev fam_subloc_sev_1300`X'
ren countryname subloc
sort subloc
save famines_subloc_1300_`X', replace
}

foreach X in 1353 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
keep if countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
tab countryname
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= `X' & year <= 1400
tab countryname
collapse (sum) famine*, by(countryname)
ren famine fam_subloc_`X'1400
ren famine_sev fam_subloc_sev_`X'1400
ren countryname subloc
sort subloc
save famines_subloc_`X'_1400, replace
}

foreach X in 1353 {
import delimited "Famines_Wikipedia.csv", clear 
gen countryname = location
keep if countryname == "Languedoc" | countryname == "Nordic Countries" | countryname == "Southern Lombardy" | countryname == "Venice" | countryname == "England"
tab countryname
gen famine = 1
gen famine_sev = 1 if severe == 1 
keep if year >= `X' & year <= 1600
tab countryname
collapse (sum) famine*, by(countryname)
ren famine fam_subloc_`X'1600
ren famine_sev fam_subloc_sev_`X'1600
ren countryname subloc
sort subloc
save famines_subloc_`X'_1600, replace
}

*** WE COMBINE THE INFORMATION ***

use finaljjk4, clear 
* We create a sublocation variable. 
gen subloc = ""
replace subloc = "Nordic Countries" if city_jjk == "OSLO" | city_jjk == "STOCKHOLM"
replace subloc = "Southern Lombardy" if city_jjk == "MILANO" | city_jjk == "CREMONA"
replace subloc = "Venice" if city_jjk == "VENEZIA"
* This adds information on the sublocation name
sort countryname city_jjk
merge countryname city_jjk using england_cities, update
tab _m
drop _m
sort countryname city_jjk
merge countryname city_jjk using languedoc_cities, update
tab _m
drop _m
* We add information on the sublocations
sort subloc 
merge subloc using famines_subloc_1300_1353
tab _m
drop _m
sort subloc 
merge subloc using famines_subloc_1353_1400
tab _m
drop _m
sort subloc 
merge subloc using famines_subloc_1353_1600
tab _m
drop _m
* We add information from Europe *
gen count = 1
foreach X in 1352 {
sort count 
merge count using famines_europe_1300_`X'
tab _m
drop _m
}
foreach X in 1353 {
sort count 
merge count using famines_europe_`X'_1600
tab _m
drop _m
}
drop count
egen fam_13531400 = rsum(fam_cntry_13531400 fam_subloc_13531400)
egen fam_13001352 = rsum(fam_cntry_13001352 fam_euro_13001352)
egen fam_13531600 = rsum(fam_cntry_13531600 fam_euro_13531600 fam_subloc_13531600)
drop fam_cntry* fam_sublo* fam_euro*
label var fam_13531400 "Number of famines in 1353-1400"
label var fam_13001352 "Number of famines in 1300-1352"
label var fam_13531600 "Number of famines in 1353-1600"
drop subloc
save finaljjk4, replace

**************************
*** HISTORICAL WETNESS ***
**************************

* See Web Appendix for data sources.
* Information on precipitation just before 1315-1317 famine *
* The values are an index number (same as used in the paper where we found the data). It is called the Palmer Drought Severity Index. Higher values correspond to wetter soil during the months of June, July, and August. Lower values mean drier soil during those months.
* HIGH = wet in summer, LOW = dry. 
use cities_precipitation.dta, clear
keep countryname city_jjk precip*
sort countryname city_jjk
save cities_precipitation2, replace

* We combine with the main data set.
use finaljjk4, clear 
* Adding precipitation data 1314-1317
count
sort countryname city_jjk
merge countryname city_jjk using cities_precipitation2
tab _m
tab city_jjk if _m == 1
drop _m
* We create the wetness variable
egen precip131416 = rmean(precip1314-precip1316) 
sum precip131416 if year == 1300, d
* 2-2.99 unusual moist spell
* 3-+ very moist spell
foreach X in 131416 {
gen extrawet`X' = 0 if precip`X' < 2
replace extrawet`X' = precip`X'-2 if precip`X' >= 2
sum extrawet`X', d
}
foreach X in 131416 {
gen wet`X' = (precip`X' >= 2)
}
foreach X in 131416 {
gen wet`X'_moist = (precip`X' >= 2 & precip`X' < 2.99)
tab wet`X'_moist
gen wet`X'_vmoist = (precip`X' >= 3)
tab wet`X'_vmoist
}
desc extrawet131416
desc wet131416_*moist
drop precip1314-precip131416
label var extrawet131416 "Extra wetness measure 1314-1316"
drop wet131416
label var wet131416_moist "Unusual moist spell 1314-1316"
label var wet131416_vmoist "Very unusual moist spell 1314-1316"
save finaljjk4, replace

***************************
*** JEWISH PERSECUTIONS ***
***************************

* This is the data set from the following source:
* Remi Jedwab & Noel D. Johnson & Mark Koyama, 2019. "Negative shocks and mass persecutions: evidence from the Black Death," Journal of Economic Growth, Springer, vol. 24(4), pages 345-395, December
use jjk_jews_new2, clear
* PRESENCE
tab jewpres13471352_x 
tab jewpres13471352_ajk 
gen jewpresbd = (jewpres13471352_x == 1 | jewpres13471352_ajk == 1)
replace jewpresbd = . if jewpres13471352_x == . & jewpres13471352_ajk == .
tab jewpresbd, m
* PERSECUTION
tab jewpers13471352_x
tab jewpers13471352_ajk
gen jewpersbd = (jewpers13471352_x == 1 | jewpers13471352_ajk == 1)
replace jewpersbd = . if jewpers13471352_x == . & jewpers13471352_ajk == .
tab jewpersbd, m
* We keep the following variables. 
keep city_id jewpresbd jewpersbd bdpogrom_yn_x bdexpulsion_yn_x
sort city_id
save jewpersectrls, replace

* We add to the main data set
use finaljjk4, clear 
* We add the data. 
sort city_id 
merge city_id using jewpersectrls 
tab _m
tab city_jjk if _m == 1
drop if _m == 2
drop _m
* This city did not have persecutions. 
replace jewpresbd = 0 if city_jjk == "SION"
tab jewpresbd if year == 1300, m
foreach X in jewpresbd jewpersbd bdpogrom_yn_x bdexpulsion_yn_x {
replace jewpresbd = 0 if countryname == "United Kingdom" | countryname == "Ireland" | countryname == "Norway"
}
* These cities did not have Jewish communities. 
foreach X in jewpresbd jewpersbd bdpogrom_yn_x bdexpulsion_yn_x {
replace jewpresbd = 0 if jewpresbd == . 
}
replace jewpersbd = 0 if jewpersbd == .
replace bdpogrom_yn_x = 0 if jewpersbd == 0
replace bdexpulsion_yn_x = 0 if jewpersbd == 0
* These cities had persecutions but were not in the original data set.
replace bdpogrom_yn_x = 1 if city_jjk == "BASEL" | city_jjk == "CHAMBERY"
replace bdexpulsion_yn_x = 1 if city_jjk == "BASEL" 
replace bdexpulsion_yn_x = 0 if city_jjk == "CHAMBERY"
save finaljjk4, replace

****************************************
*** MORTALITY OF CITIES WITHIN 50 KM ***
****************************************

* Cities where Christakos mortality
use finaljjk4, clear
keep if mortality != . & year == 1300
count
* 274, ok
keep city_jjk longitude latitude mortality
ren longitude christakos_long
ren latitude christakos_lat
save christakos_coord, replace

* We obtain the mortality rates based on 50km. 
foreach X in 50 {
use finaljjk4, clear
keep if year == 1300 & mortality == .
count
keep city_id longitude latitude
ren longitude missmort_long
ren latitude missmort_lat
cross using christakos_coord
geodist missmort_lat missmort_long christakos_lat christakos_long, gen(dist)
keep if dist < `X'
gsort city_id dist
bysort city_id: keep if _n == 1
ren mortality mortality_`X'
keep city_id mortality_`X'
sort city_id
save missmort_christakos_`X', replace
}

use finaljjk4, clear
foreach X in 50 {
sort city_id
merge city_id using missmort_christakos_`X'
tab _m
drop _m
label var mortality_`X' "Motality based on cities within 50 km"
}
save finaljjk4, replace

*********************************************
*** MORTALITY OF CITIES IN THE SAME STATE ***
*********************************************

use finaljjk4, clear
keep if year == 1300
keep if mortality != .
count
* 274
codebook pop1300_05
collapse (mean) mortality [iw=pop1300_05], by(entity1300)
ren mortality mortality_state
sort entity1300
save mortality_state, replace

use finaljjk4, clear
sort entity1300
merge entity1300 using mortality_state
tab _m
drop _m
label var mortality_state "Motality based on cities in the same state"
save finaljjk4, replace

*********************************************
*** MARKET ACCESS, EXCLUDING CLOSE CITIES ***
*********************************************

foreach N in 25 50 {
foreach X in 86 219 332 {
use matrix_for_shock_1801eeo, clear
keep if dist <= `X'
drop if dist < `N'
collapse (mean) mort_1801eeo [iw= pop_05], by(city_id)
ren mort_1801e mortdist`X'_no`N'
sort city_id
save shock_dist_`X'_1801eeo_no`N', replace 
}
}

use finaljjk4, clear
foreach N in 25 50 {
foreach X in 86 219 332 {
sort city_id
merge city_id using shock_dist_`X'_1801eeo_no`N'
tab _m
drop _m
label var mortdist`X'_no`N' "Avg mortality between `N' and `X' km"
}
}
save finaljjk4, replace

****************
*** EMP DATA ***
****************

* See the Web appendix for data sources. 

* Countryname2 *
import delimited "emp.csv", clear 
gen countryname2 = countryname
replace countryname2 = "France South" if countryname2 == "France"
replace countryname2 = "France Centre" if countryname2 == "Central France"
replace countryname2 = "France North" if countryname2 == "Northern Franc"
replace countryname2 = "Spain South" if countryname2 == "Southern Spain"
replace countryname2 = "Spain Centre" if countryname2 == "Central Spain"
replace countryname2 = "Spain North" if countryname2 == "Spain"
replace countryname2 = "Italy South" if countryname2 == "Southern Italy"
replace countryname2 = "Italy North" if countryname2 == "Italy"
replace countryname2 = "England" if countryname2 == "United Kingdom"
replace countryname2 = "Ireland" if city_jjk == "ABERDEEN" | city_jjk == "EDINBURGH"
replace countryname2 = "Scotland" if city_jjk == "DROGHEDA" | city_jjk == "DUBLIN" | city_jjk == "KILKENNY" | city_jjk == "NEW-ROSS" | city_jjk == "WATERFORD" | city_jjk == "CORK"
keep city_jjk countryname2
sort city_jjk
save countryname2, replace

* EMP data *
import excel "emp_data.xlsx", sheet("Sheet1") firstrow clear
sort countryname2
save emp_data, replace

**************************
*** REFORESTATION DATA ***
**************************

* We use our pixel-level data on forests over time and create a panel at the pixel level
* See Web Data Appendix for the sources. 
use land_pixels_1100, clear
foreach X in 1200 1300 1360 1400 1500 1600 1700 1750 1800 1850 {
append using land_pixels_`X'
}
tab year
reshape wide landsh, i(long_land lat_land) j(year)
count
* 92,978
sort long_land lat_land
gen landuse_id = _n
sort landuse_id
save land_pixels_all, replace

* To create the distance, we only select the coordinates. 
use land_pixels_all, clear
keep *id lon* lat*
sort landuse_id 
save land_pixels_all_coord, replace
* We export to GIS 
* We then use GIS to select pixels within our region 
import delimited "pixels_west.csv", clear 
keep landuse_id
gen west = 1
sort landuse_id
save pixels, replace
import delimited "pixels_east.csv", clear 
keep landuse_id
gen east = 1
append using pixels
keep if west == 1 | east == 1
sort landuse_id
save pixels, replace
count
* 59704

* We keep the pixels that belong to one of the 13 countries with sample cities *
use land_pixels_all_coord, clear
count
* 92978
sort landuse_id
merge landuse_id using pixels
tab _m
keep if _m == 3
drop _m
sort landuse_id
save land_pixels_all_coord_13, replace

* We obtain the distance from each city to each pixel
* Since it takes time, we divide the cities into two samples
* Subsample 1
*use finaljjk4, clear
*keep if samplemort == 1 & year == 1300
*count
** 165
*keep city_id longitude latitude 
*keep if _n <= 82
*count
*ren longitude city_lon
*ren latitude city_lat
*cross using land_pixels_all_coord_13
*count
*geodist city_lat city_lon lat_land long_land, gen(dist)
*save dist2landuse, replace
* Subsample 2 + we merge the two 
*use finaljjk4, clear
*keep if samplemort == 1 & year == 1300
*count
* 165
*keep city_id longitude latitude 
*keep if _n > 82
*count
*ren longitude city_lon
*ren latitude city_lat
*cross using land_pixels_all_coord_13
*count
*geodist city_lat city_lon lat_land long_land, gen(dist)
*append using dist2landuse
*drop *lon* *lat*
*sort landuse_id 
*merge landuse_id using land_pixels_all
*tab _m
*drop _m
*drop *land
*save dist2landuse, replace

* We then use the matched data set and use pixels within 10 km.
foreach X in 10 {
use dist2landuse, clear
keep if dist <= `X'
collapse (mean) landsh*, by(city_id)
count
foreach Z in 1100 1200 1300 1360 1400 1500 1600 1700 1750 1800 1850 {
ren landsh`Z' anthro_tech_`X'k_`Z'
} 
sort city_id
save landsh_wi`X', replace
}
* 165

* Country population data *
* Se Web Data Appendix for the sources. 
import excel "country level population 01022019.xlsx", sheet("stata") firstrow clear
foreach X of varlist pop* {
ren `X' `X'c
}
ren country_orig countryname
ren mortestnew mortalityc
keep countryname pop* mortalityc
replace countryname = "Czech Republic" if countryname == "Czechslovakia"
drop if countryname == ""
sort countryname
save countrypops, replace

* We combine with the main data set 
use finaljjk4, clear
foreach X in 10 {
sort city_id
merge city_id using landsh_wi`X'
tab _m
drop if _m == 2
drop _m
}
* We create the main variables
foreach Y in tech {
foreach X in 10k {
bysort countrycity: gen changefor1200_`X'_`Y' = anthro_`Y'_`X'_1200-anthro_`Y'_`X'_1100*100
bysort countrycity: gen changefor1200_`X'_2_`Y' = (anthro_`Y'_`X'_1200-anthro_`Y'_`X'_1100)/anthro_`Y'_`X'_1100*100
bysort countrycity: gen changefor1300_`X'_`Y' = anthro_`Y'_`X'_1300-anthro_`Y'_`X'_1200*100
bysort countrycity: gen changefor1300_`X'_2_`Y' = (anthro_`Y'_`X'_1300-anthro_`Y'_`X'_1200)/anthro_`Y'_`X'_1200*100
bysort countrycity: gen changefor1360_`X'_`Y' = anthro_`Y'_`X'_1360-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1360_`X'_2_`Y' = (anthro_`Y'_`X'_1360-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1400_`X'_`Y' = anthro_`Y'_`X'_1400-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1400_`X'_2_`Y' = (anthro_`Y'_`X'_1400-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1500_`X'_`Y' = anthro_`Y'_`X'_1500-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1500_`X'_2_`Y' = (anthro_`Y'_`X'_1500-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1600_`X'_`Y' = anthro_`Y'_`X'_1600-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1600_`X'_2_`Y' = (anthro_`Y'_`X'_1600-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1700_`X'_`Y' = anthro_`Y'_`X'_1700-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1700_`X'_2_`Y' = (anthro_`Y'_`X'_1700-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1750_`X'_`Y' = anthro_`Y'_`X'_1700-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1750_`X'_2_`Y' = (anthro_`Y'_`X'_1700-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1800_`X'_`Y' = anthro_`Y'_`X'_1800-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1800_`X'_2_`Y' = (anthro_`Y'_`X'_1800-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1850_`X'_`Y' = anthro_`Y'_`X'_1850-anthro_`Y'_`X'_1300*100
bysort countrycity: gen changefor1850_`X'_2_`Y' = (anthro_`Y'_`X'_1850-anthro_`Y'_`X'_1300)/anthro_`Y'_`X'_1300*100
}
}
foreach Y in tech {
foreach X in 10k {
foreach Z in 1200 1300 1360 1400 1500 1600 1700 1750 1800 1850 {
label var changefor`Z'_`X'_`Y' "Absolute change in forest cover share within 10km, 1300-`Z'"
}
}
}
foreach Y in tech {
foreach X in 10k {
foreach Z in 1200 1300 1360 1400 1500 1600 1700 1750 1800 1850 {
label var changefor`Z'_`X'_2_`Y' "Pct change in forest cover within 10km, 1300-`Z'"
}
}
}
foreach Y in tech {
foreach X in 10k {
foreach Z in 1200 1300 1360 1400 1500 1600 1700 1750 1800 1850 {
label var anthro_`Y'_`X'_`Z' "Forest cover share within 10km, `Z'"
}
}
}
* We add the data on the total population of the countries
sort countryname 
merge countryname using countrypops
tab _m
drop if _m == 2
drop _m
* We create the country-level pop control
gen changepop11001200c = (pop1200c-pop1100c)/pop1100c*100
gen changepop12001300c = (pop1300c-pop1200c)/pop1200c*100
foreach X in 1353 1400 1450 1500 1600 1700 1750 {
gen changepop1300`X'c = (pop`X'c-pop1300c)/pop1300c*100
}
label var changepop11001200c "Pct pop growth of the city's country, 1100-1200"
label var changepop12001300c "Pct pop growth of the city's country, 1200-1300"
foreach X in 1353 1400 1450 1500 1600 1700 1750 {
label var changepop1300`X'c "Pct pop growth of the city's country, 1300-`X'"
}
foreach X in 1100 1200 1300 1353 1400 1450 1500 1600 1700 1750 1800 1850 {
label var pop`X'c "Country population `X'"
}
drop mortalityc
save finaljjk4, replace

******************************************
*** DATA FOR THE HETEROGENEITY RESULTS ***
******************************************

* We use our main data set and create simplified variable names and add other variables.

use finaljjk4, clear
* Coastal variables 
gen coast = dist2AMNB_10
bysort countryname: egen maxcoast_cntry = max(coast)
bysort entity1300_old: egen maxcoast_state = max(coast)
* We keep the main sample
keep if samplemort == 1 & year >= 1300 & year <= 1850
count
* 165
* We modify the road and river variables
gen romanrd = (DMajorRomRoad_10 == 1 | DAnyRomRoad_10 == 1 | DMajRomIntersection_10 == 1 | DAnyRmRdIntersection_10 == 1)
gen medierd = (dist2landroute_10 == 1 | dist2landrouteint_10 == 1)
gen anyrd = (romanrd == 1 | medierd == 1)
gen rd_int = (DMajRomIntersection_10 == 1 | DAnyRmRdIntersection_10 == 1 | dist2landrouteint_10 == 1)
gen rd_noint = (anyrd == 1 & rd_int == 0)
gen transp_all = (coast == 1 | river == 1 | rd_int == 1 | rd_noint == 1)
gen river = rivers_10
* We create simple elevation variables 
sum elevation if year == 1300, d
gen lelevation = log(elevation)
gen elevmean = (elevation >= 148.8121)
gen elev50 = (elevation >= 62)
gen elev75 = (elevation >= 185)
gen elev90 = (elevation >= 418)
* We modify other variables
gen invcerealclosest = -cerealclosest
gen lpop1353 = log(pop1353)
gen inst_all = (monarchy == 1 | Bcapital == 1 | representative1300 == 1 | parliament1300_yn == 1)
gen repre2 = (representative1300 == 1 | parliament1300_yn == 1)
* We keep the main sample cities 
keep if samplemort == 1
* Changes in the variables 
egen mean_changepop1400 = mean(changepop1400)
foreach Z in 1500 1600 1700 1750 2015min {
egen mean_changepop1300`Z' = mean(changepop1300`Z')
}
egen sd_changepop1400 = sd(changepop1400)
foreach Z in 1500 1600 1700 1750 2015min {
egen sd_changepop1300`Z' = sd(changepop1300`Z')
}
gen changepop1400_std = (changepop1400-mean_changepop1400)/sd_changepop1400
foreach Z in 1500 1600 1700 1750 2015min {
gen changepop1300`Z'_std = (changepop1300`Z'-mean_changepop1300`Z')/sd_changepop1300`Z'
}
drop mean_* sd_*
save data_for_het, replace 

*****************************
*** DMV ANALYSIS DATA SET ***
*****************************

* See the web appendix for details on the sources used. 

* Database with the number of DMVs per county
* Main source: The Beresford website
use numberdmvs, clear
replace county = "Devon" if county == "Devonshire"
replace county = "YorkshireER" if county == "Yorkshire, East Riding"
replace county = "YorkshireNR" if county == "Yorkshire, North Riding"
replace county = "YorkshireWR" if county == "Yorkshire, West Riding"
replace county = "YorkshireWR" if county == "Middlesex"
* Isle of Wight was part of Hampshire then
replace county = "Hampshire" if county == "Isle of Wight"
collapse (sum) countdmvs, by(county)
sort county
save numberdmvs2, replace

* Within and outside 10 km
foreach X in 10 {
use numberdmvs_in`X', clear
replace county = "Devon" if county == "Devonshire"
replace county = "YorkshireER" if county == "Yorkshire, East Riding"
replace county = "YorkshireNR" if county == "Yorkshire, North Riding"
replace county = "YorkshireWR" if county == "Yorkshire, West Riding"
replace county = "YorkshireWR" if county == "Middlesex"
* Isle of Wight was part of Hampshire then
replace county = "Hampshire" if county == "Isle of Wight"
collapse (sum) countdmvs_in`X' = countdmvs, by(county)
sort county
save numberdmvs2_in`X', replace
use numberdmvs_out`X', clear
replace county = "Devon" if county == "Devonshire"
replace county = "YorkshireER" if county == "Yorkshire, East Riding"
replace county = "YorkshireNR" if county == "Yorkshire, North Riding"
replace county = "YorkshireWR" if county == "Yorkshire, West Riding"
replace county = "YorkshireWR" if county == "Middlesex"
* Isle of Wight was part of Hampshire then
replace county = "Hampshire" if county == "Isle of Wight"
collapse (sum) countdmvs_out`X' = countdmvs, by(county)
sort county
save numberdmvs2_out`X', replace
}

* Data on the type
use numbertypesdmvs, clear
replace county = "Devon" if county == "Devonshire"
replace county = "YorkshireER" if county == "Yorkshire, East Riding"
replace county = "YorkshireNR" if county == "Yorkshire, North Riding"
replace county = "YorkshireWR" if county == "Yorkshire, West Riding"
replace county = "YorkshireWR" if county == "Middlesex"
replace county = "Hampshire" if county == "Isle of Wight"
sort county
save numbertypesdmvs2, replace

* This is the population data at the county level
* See the Web Appendix for the sources 
import excel "Pop Correction.xlsx", sheet("Base Population Data") firstrow clear
keep county pop*
ren pop1086 pop1086new
ren pop1290 pop1290new
ren pop1377 pop1377new
sort county
save newpop, replace
import excel "census1801.xlsx", sheet("Sheet1") firstrow clear
sort county
save pop1801new, replace

* Average mortality rate from our data 
use finaljjk4, clear
sort city_id 
merge city_id using cities_counties_mark
tab _m
tab countryname if _m == 3
keep if _m == 3
drop _m
tab year
* 180 cities
collapse (mean) mortality [pw=pop1300_05], by(county)
ren mortality avgmort
sort county
save avgmortcounty, replace
use avgmortcounty, clear
ren county county_mark
sort county_mark
save avgmortcounty2, replace

* Total city pop from our data
use finaljjk4, clear
sort city_id 
merge city_id using cities_counties_mark
tab _m
tab countryname if _m == 3
keep if _m == 3
drop _m
tab year
* 180 cities
drop pop1353
gen pop1353 = pop1300/100*(100-mortality274)
keep pop pop1353 county year 
collapse (sum) pop pop1353, by(county year)
ren pop upop
reshape wide upop, i(county) j(year)
ren pop1353 upop1353
sort county
save upopcounty, replace
use upopcounty, clear
ren county county_mark
sort county_mark
save upopcounty2, replace
codebook
count if upop1300 != 0

* We start with the lost villages data set 
* See the Web Appendix for data sources. 
use "plague_county_lost_villages.dta", clear
drop if county == ""
* Number of DMVs *
sort county
merge county using numberdmvs2
tab _m
tab county if _m == 1
drop _m
* In and outside *
foreach X in 10 {
sort county
merge county using numberdmvs2_in`X'
tab _m
drop _m
sort county
merge county using numberdmvs2_out`X'
tab _m
drop _m
replace countdmvs_in`X' = 0 if countdmvs_in`X' == . & countdmvs != .
replace countdmvs_out`X' = 0 if countdmvs_out`X' == . & countdmvs != .
}
codebook countdmvs_in* countdmvs_out*
* By type *
sort county
merge county using numbertypesdmvs2
tab _m
drop _m
* ok
* We add the average mortality rate from our data 
gen county_mark = upper(county)
sort county_mark
merge county_mark using avgmortcounty2
tab _m
tab county_mark if _m == 1
drop _m
* We add the population data *
sort county_mark
merge county_mark using upopcounty2
tab _m
tab county_mark if _m == 1
drop _m
sort county
merge county using newpop
tab _m
tab county_mark if _m == 1
drop if _m == 2
drop _m
sort county
merge county using pop1801new
tab _m
tab county if _m == 1
tab county if _m == 2
drop if _m == 2
drop _m

** We modify the population and other variables **
* Density of DMVs
gen lCountyAcreage = log(CountyAcreage)
gen lDMVsper100Kac = log(DMVsper100Kac)
* Plague
gen Plague_mortality_clergy_yn = (Plague_mortality_clergy != .)
gen Plague_mortal = Plague_mortality_clergy
* Pop and change in pop
foreach X in 1086 1290 1377 1801 {
gen lpop`X'new = log(pop`X'new)
}
gen chgpop12901377new = (pop1377new-pop1290new)/pop1290new*100
gen chgpop10861290new = (pop1290new-pop1086new)/pop1086new*100
gen chgpop12901801new = (pop1801new-pop1290new)/pop1290new*100
gen chgpop12901756new = (pop1756new-pop1290new)/pop1290new*100
gen chgpop17561801new = (pop1801new-pop1756new)/pop1756new*100
* Same but standardized
egen mean = mean(chgpop12901377new) if county != "Cornwall" & Plague_mortality_clergy != .
egen std = sd(chgpop12901377new) if county != "Cornwall" & Plague_mortality_clergy != .
gen chgpop12901377new_std = (chgpop12901377new-mean)/std
drop mean std
egen mean = mean(chgpop12901756new) if county != "Cornwall" & Plague_mortality_clergy != .
egen std = sd(chgpop12901756new) if county != "Cornwall" & Plague_mortality_clergy != .
gen chgpop12901756new_std = (chgpop12901756new-mean)/std
drop mean std
egen mean = mean(chgpop10861290new) if county != "Cornwall" & Plague_mortality_clergy != .
egen std = sd(chgpop10861290new) if county != "Cornwall" & Plague_mortality_clergy != .
gen chgpop10861290new_std = (chgpop10861290new-mean)/std
drop mean std

** Correlation DMVs and each type **

egen countdmvs_new = rsum(dmv dmh doubt shrunk shift)
replace countdmvs_new = . if countdmvs_new == 0
gen countdmvs_old = DMVsper100Kacres*(CountyAcreage/100000)
corr countdmvs_old countdmvs_new
* 0.9967, so ok
gen test = countdmvs_old*100000/CountyAcreage
corr DMVsper100Kacres test
* ok
gen dmvh = dmv+dmh
gen nodmvh = shrunk + shif 
foreach X in dmv dmh doubt shrunk shif countdmvs_new dmvh nodmvh {
gen `X'd = `X'*100000/CountyAcreage
}
corr DMVsper100Kacre countdmvs_newd
tab county if countdmvs_new != . & DMVsper100Kacres != . & Plague_mortal == .
tab county if countdmvs_new == . & DMVsper100Kacres != . & Plague_mortal == .
foreach Z in 10 {
ren countdmvs_in`Z' in`Z'
ren countdmvs_out`Z' out`Z'
foreach X in in`Z' out`Z' {
gen `X'd = `X'*100000/CountyAcreage
}
}
* Same but standardized
ren DMVsper100Kacres DMVsper100Kacre
egen mean = mean(DMVsper100Kacre) if Plague_mortality_clergy != .
egen std = sd(DMVsper100Kacre) if Plague_mortality_clergy != .
gen DMVsper100Kacre_std = (DMVsper100Kacre-mean)/std
drop mean std
egen mean = mean(in10) if Plague_mortality_clergy != .
egen std = sd(in10) if Plague_mortality_clergy != .
gen in10d_std = (in10-mean)/std
drop mean std
egen mean = mean(out10) if Plague_mortality_clergy != .
egen std = sd(out10) if Plague_mortality_clergy != .
gen out10d_std = (out10-mean)/std
drop mean std

* Finally, we create the urbanization variables
gen urb1300 = upop1300*1000/pop1290*100
gen urb1750 = upop1750*1000/pop1756*100
foreach X in 1750 {
replace urb`X' = 100 if urb`X' >= 100 & urb`X' != .
}
gen chgurb13001750 = (urb1750-urb1300)
foreach Z in chgurb13001750 {
egen mean = mean(`Z') if county != "Cornwall" & Plague_mortality_clergy != .
egen std = sd(`Z') if county != "Cornwall" & Plague_mortality_clergy != .
gen `Z'_std = (`Z'-mean)/std
drop mean std
}
* We keep the variables we need and label them. 
keep county Plague_mortal* DMVsper100Kacre* in10d* out10d* lCountyAcreage urb1300 urb1750 chgurb13001750* chgpop* *pop*new
drop chgpop17561801new Plague_mortality_clergy_yn
foreach X in 1086 1290 1377 1801 {
label var pop`X'new "Population in `X'"
label var lpop`X'new "Log population in `X'"
}
label var pop1756new "Population in 1756"
label var chgpop12901377new "Pct change in pop 1290-1377"
label var chgpop10861290new "Pct change in pop 1086-1290"
label var chgpop12901801new "Pct change in pop 1290-1801"
label var chgpop12901756new "Pct change in pop 1290-1756"
label var chgpop12901377new_std "Z-score of Pct change in pop 1290-1377"
label var chgpop10861290new_std "Z-score of Pct change in pop 1086-1290"
label var chgpop12901756new_std "Z-score of Pct change in pop 1290-1756"
label var urb1300 "Urban share 1300"
label var urb1750 "Urban share 1750"
label var chgurb13001750 "Absolute change in the urban share 1300-1750"
label var chgurb13001750_std "Z-score of the absolute change in the urban share 1300-1750"
label var Plague_mortal "Mortality"
label var Plague_mortality_clergy "Mortality"
label var lCountyAcreage "Log county acreage"
label var DMVsper100Kacre "Density of DMVs"
label var DMVsper100Kacre_std "Z-score Density of DMVs"
label var in10d "Density of DMVs within 10km"
label var out10d "Density of DMVs outside 10km"
label var in10d_std "Z-score Density of DMVs within 10km"
label var out10d_std "Z-score Density of DMVs outside 10km"
save dmvdata, replace




